1925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper/*===---- avx2intrin.h - AVX2 intrinsics -----------------------------------=== 2925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * 3925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * Permission is hereby granted, free of charge, to any person obtaining a copy 4925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * of this software and associated documentation files (the "Software"), to deal 5925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * in the Software without restriction, including without limitation the rights 6925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * copies of the Software, and to permit persons to whom the Software is 8925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * furnished to do so, subject to the following conditions: 9925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * 10925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * The above copyright notice and this permission notice shall be included in 11925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * all copies or substantial portions of the Software. 12925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * 13925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * THE SOFTWARE. 20925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * 21925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper *===-----------------------------------------------------------------------=== 22925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper */ 23925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 24925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper#ifndef __IMMINTRIN_H 25925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper#error "Never use <avx2intrin.h> directly; include <immintrin.h> instead." 26925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper#endif 27925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 287cb4fae8da8f541f43d39896c989b06c69fd7821Richard Smith#ifndef __AVX2INTRIN_H 297cb4fae8da8f541f43d39896c989b06c69fd7821Richard Smith#define __AVX2INTRIN_H 307cb4fae8da8f541f43d39896c989b06c69fd7821Richard Smith 31925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper/* SSE4 Multiple Packed Sums of Absolute Difference. */ 32925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper#define _mm256_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw256((X), (Y), (M)) 33925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 34925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 354f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_abs_epi8(__m256i __a) 36925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 374f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a); 38925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 39925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 40925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 414f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_abs_epi16(__m256i __a) 42925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 434f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a); 44925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 45925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 46925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 474f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_abs_epi32(__m256i __a) 48925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 494f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pabsd256((__v8si)__a); 50925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 51925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 52925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 534f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_packs_epi16(__m256i __a, __m256i __b) 54925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 554f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b); 56925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 57925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 58925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 594f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_packs_epi32(__m256i __a, __m256i __b) 60925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 614f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b); 62925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 63925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 64925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 654f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_packus_epi16(__m256i __a, __m256i __b) 66925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 674f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b); 68925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 69925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 70925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 71925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_packus_epi32(__m256i __V1, __m256i __V2) 72925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 73925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2); 74925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 75925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 76925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 774f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_add_epi8(__m256i __a, __m256i __b) 78925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 794f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v32qi)__a + (__v32qi)__b); 80925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 81925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 82925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 834f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_add_epi16(__m256i __a, __m256i __b) 84925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 854f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v16hi)__a + (__v16hi)__b); 86925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 87925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 88925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 894f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_add_epi32(__m256i __a, __m256i __b) 90925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 914f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v8si)__a + (__v8si)__b); 92925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 93925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 94925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 954f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_add_epi64(__m256i __a, __m256i __b) 96925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 974f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __a + __b; 98925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 99925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 100925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1014f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_adds_epi8(__m256i __a, __m256i __b) 1029c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 1034f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_paddsb256((__v32qi)__a, (__v32qi)__b); 1049c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 1059c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 1069c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1074f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_adds_epi16(__m256i __a, __m256i __b) 1089c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 1094f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_paddsw256((__v16hi)__a, (__v16hi)__b); 1109c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 1119c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 1129c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1134f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_adds_epu8(__m256i __a, __m256i __b) 1149c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 1154f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_paddusb256((__v32qi)__a, (__v32qi)__b); 1169c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 1179c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 1189c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1194f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_adds_epu16(__m256i __a, __m256i __b) 1209c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 1214f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b); 1229c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 1239c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 1249c2ffd803af03f1728423d0d73ff87d988642633Craig Topper#define _mm256_alignr_epi8(a, b, n) __extension__ ({ \ 1259c2ffd803af03f1728423d0d73ff87d988642633Craig Topper __m256i __a = (a); \ 1269c2ffd803af03f1728423d0d73ff87d988642633Craig Topper __m256i __b = (b); \ 1279c2ffd803af03f1728423d0d73ff87d988642633Craig Topper (__m256i)__builtin_ia32_palignr256((__v32qi)__a, (__v32qi)__b, (n)); }) 1289c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 1299c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1304f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_and_si256(__m256i __a, __m256i __b) 131735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper{ 1324f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __a & __b; 133735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper} 134735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper 135735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1364f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_andnot_si256(__m256i __a, __m256i __b) 137735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper{ 1384f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return ~__a & __b; 139735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper} 140735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper 141735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1424f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_avg_epu8(__m256i __a, __m256i __b) 1434c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1444f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b); 1454c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1464c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1474c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1484f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_avg_epu16(__m256i __a, __m256i __b) 1494c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1504f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b); 1514c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1524c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1534c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1544c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M) 1554c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1564c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2, 1574c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper (__v32qi)__M); 1584c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1594c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1604c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper#define _mm256_blend_epi16(V1, V2, M) __extension__ ({ \ 1614c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper __m256i __V1 = (V1); \ 1624c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper __m256i __V2 = (V2); \ 1635aeaca3fa755cddba583842e7a0c3e168bf71b4dCraig Topper (__m256i)__builtin_ia32_pblendw256((__v16hi)__V1, (__v16hi)__V2, (M)); }) 1644c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1654c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1664f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cmpeq_epi8(__m256i __a, __m256i __b) 1674c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1684f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v32qi)__a == (__v32qi)__b); 1694c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1704c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1714c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1724f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cmpeq_epi16(__m256i __a, __m256i __b) 1734c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1744f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v16hi)__a == (__v16hi)__b); 1754c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1764c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1774c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1784f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cmpeq_epi32(__m256i __a, __m256i __b) 1794c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1804f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v8si)__a == (__v8si)__b); 1814c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1824c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1834c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1844f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cmpeq_epi64(__m256i __a, __m256i __b) 1854c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1864f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)(__a == __b); 1874c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1884c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1894c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1904f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cmpgt_epi8(__m256i __a, __m256i __b) 1914c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1924f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v32qi)__a > (__v32qi)__b); 1934c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1944c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1954c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1964f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cmpgt_epi16(__m256i __a, __m256i __b) 1974c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1984f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v16hi)__a > (__v16hi)__b); 1994c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 2004c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 2014c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2024f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cmpgt_epi32(__m256i __a, __m256i __b) 2034c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 2044f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v8si)__a > (__v8si)__b); 2054c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 2064c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 2074c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2084f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cmpgt_epi64(__m256i __a, __m256i __b) 2094c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 2104f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)(__a > __b); 2114c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 2124c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 2134c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2144f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_hadd_epi16(__m256i __a, __m256i __b) 215318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{ 2164f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b); 217318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper} 218318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper 219318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2204f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_hadd_epi32(__m256i __a, __m256i __b) 221318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{ 2224f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b); 223318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper} 224318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper 225318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2264f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_hadds_epi16(__m256i __a, __m256i __b) 227318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{ 2284f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b); 229318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper} 230318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper 231318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2324f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_hsub_epi16(__m256i __a, __m256i __b) 233318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{ 2344f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b); 235318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper} 236318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper 237318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2384f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_hsub_epi32(__m256i __a, __m256i __b) 239318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{ 2404f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b); 241318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper} 242318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper 243318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2444f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_hsubs_epi16(__m256i __a, __m256i __b) 245318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{ 2464f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b); 247318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper} 248318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper 249318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2504f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_maddubs_epi16(__m256i __a, __m256i __b) 2514a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper{ 2524f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b); 2534a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper} 2544a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper 2554a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2564f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_madd_epi16(__m256i __a, __m256i __b) 2574a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper{ 2584f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b); 2594a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper} 2604a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper 2614a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2624f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_max_epi8(__m256i __a, __m256i __b) 263231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 2644f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b); 265231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 266231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 267231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2684f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_max_epi16(__m256i __a, __m256i __b) 269231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 2704f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b); 271231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 272231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 273231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2744f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_max_epi32(__m256i __a, __m256i __b) 275231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 2764f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b); 277231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 278231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 279231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2804f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_max_epu8(__m256i __a, __m256i __b) 281231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 2824f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b); 283231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 284231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 285231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2864f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_max_epu16(__m256i __a, __m256i __b) 287231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 2884f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b); 289231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 290231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 291231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2924f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_max_epu32(__m256i __a, __m256i __b) 293231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 2944f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b); 295231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 296231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 297231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2984f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_min_epi8(__m256i __a, __m256i __b) 299231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 3004f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b); 301231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 302231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 303231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 3044f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_min_epi16(__m256i __a, __m256i __b) 305231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 3064f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b); 307231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 308231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 309231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 3104f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_min_epi32(__m256i __a, __m256i __b) 311231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 3124f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b); 313231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 314231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 315231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 3164f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_min_epu8(__m256i __a, __m256i __b) 317231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 3184f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b); 319231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 320231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 321231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 3224f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_min_epu16(__m256i __a, __m256i __b) 323231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 3244f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b); 325231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 326231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 327231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 3284f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_min_epu32(__m256i __a, __m256i __b) 329231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 3304f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b); 331231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 332231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 333231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 3344f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_movemask_epi8(__m256i __a) 335231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 3364f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_pmovmskb256((__v32qi)__a); 337231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 338231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 339231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 340231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi8_epi16(__m128i __V) 341231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 342231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovsxbw256((__v16qi)__V); 343231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 344231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 345231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 346231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi8_epi32(__m128i __V) 347231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 348231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovsxbd256((__v16qi)__V); 349231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 350231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 351231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 352231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi8_epi64(__m128i __V) 353231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 354231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovsxbq256((__v16qi)__V); 355231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 356231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 357231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 358231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi16_epi32(__m128i __V) 359231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 360231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovsxwd256((__v8hi)__V); 361231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 362231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 363231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 364231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi16_epi64(__m128i __V) 365231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 366231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovsxwq256((__v8hi)__V); 367231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 368231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 369231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 370231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi32_epi64(__m128i __V) 371231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 372231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovsxdq256((__v4si)__V); 373231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 374231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 375231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 376231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu8_epi16(__m128i __V) 377231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 378231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovzxbw256((__v16qi)__V); 379231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 380231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 381231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 382231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu8_epi32(__m128i __V) 383231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 384231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovzxbd256((__v16qi)__V); 385231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 386231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 387231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 388231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu8_epi64(__m128i __V) 389231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 390231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovzxbq256((__v16qi)__V); 391231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 392231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 393231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 394231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu16_epi32(__m128i __V) 395231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 396231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovzxwd256((__v8hi)__V); 397231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 398231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 399231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 400231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu16_epi64(__m128i __V) 401231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 402231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovzxwq256((__v8hi)__V); 403231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 404231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 405231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 406231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu32_epi64(__m128i __V) 407231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 408231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovzxdq256((__v4si)__V); 409231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 410231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 41128a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 4124f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_mul_epi32(__m256i __a, __m256i __b) 41328a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 4144f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b); 41528a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 41628a324a30b0677309a4c5d73ef5197398265e129Craig Topper 41728a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 4184f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_mulhrs_epi16(__m256i __a, __m256i __b) 41928a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 4204f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b); 42128a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 42228a324a30b0677309a4c5d73ef5197398265e129Craig Topper 42328a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 4244f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_mulhi_epu16(__m256i __a, __m256i __b) 42528a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 4264f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b); 42728a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 42828a324a30b0677309a4c5d73ef5197398265e129Craig Topper 42928a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 4304f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_mulhi_epi16(__m256i __a, __m256i __b) 43128a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 4324f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b); 43328a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 43428a324a30b0677309a4c5d73ef5197398265e129Craig Topper 43528a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 4364f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_mullo_epi16(__m256i __a, __m256i __b) 43728a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 4384f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v16hi)__a * (__v16hi)__b); 43928a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 44028a324a30b0677309a4c5d73ef5197398265e129Craig Topper 44128a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 4424f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_mullo_epi32 (__m256i __a, __m256i __b) 44328a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 4444f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v8si)__a * (__v8si)__b); 44528a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 44628a324a30b0677309a4c5d73ef5197398265e129Craig Topper 44728a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 4484f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_mul_epu32(__m256i __a, __m256i __b) 44928a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 4504f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b); 45128a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 45228a324a30b0677309a4c5d73ef5197398265e129Craig Topper 453231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 4544f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_or_si256(__m256i __a, __m256i __b) 455735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper{ 4564f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __a | __b; 457735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper} 458735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper 459735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 4604f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sad_epu8(__m256i __a, __m256i __b) 461cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 4624f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b); 463cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 464cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 465cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 4664f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_shuffle_epi8(__m256i __a, __m256i __b) 467cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 4684f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b); 469cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 470cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 471cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper#define _mm256_shuffle_epi32(a, imm) __extension__ ({ \ 472cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper __m256i __a = (a); \ 473cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)_mm256_set1_epi32(0), \ 474cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (imm) & 0x3, ((imm) & 0xc) >> 2, \ 475cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \ 476cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0x03) >> 0), \ 477cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0x0c) >> 2), \ 478cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0x30) >> 4), \ 479cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0xc0) >> 6)); }) 480cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 481cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper#define _mm256_shufflehi_epi16(a, imm) __extension__ ({ \ 482cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper __m256i __a = (a); \ 483cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)_mm256_set1_epi16(0), \ 484cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 0, 1, 2, 3, \ 485cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0x03) >> 0), \ 486cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0x0c) >> 2), \ 487cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0x30) >> 4), \ 488cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0xc0) >> 6), \ 489cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 8, 9, 10, 11, \ 490cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 12 + (((imm) & 0x03) >> 0), \ 491cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 12 + (((imm) & 0x0c) >> 2), \ 492cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 12 + (((imm) & 0x30) >> 4), \ 493cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 12 + (((imm) & 0xc0) >> 6)); }) 494cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 495cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper#define _mm256_shufflelo_epi16(a, imm) __extension__ ({ \ 496cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper __m256i __a = (a); \ 497cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)_mm256_set1_epi16(0), \ 498cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (imm) & 0x3,((imm) & 0xc) >> 2, \ 499cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \ 500cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4, 5, 6, 7, \ 501cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 8 + (((imm) & 0x03) >> 0), \ 502cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 8 + (((imm) & 0x0c) >> 2), \ 503cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 8 + (((imm) & 0x30) >> 4), \ 504cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 8 + (((imm) & 0xc0) >> 6), \ 505cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 12, 13, 14, 15); }) 506cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 507cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 5084f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sign_epi8(__m256i __a, __m256i __b) 509cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 5104f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b); 511cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 512cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 513cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 5144f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sign_epi16(__m256i __a, __m256i __b) 515cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 5164f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b); 517cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 518cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 519cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 5204f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sign_epi32(__m256i __a, __m256i __b) 521cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 5224f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b); 523cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 524cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 525cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper#define _mm256_slli_si256(a, count) __extension__ ({ \ 526cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper __m256i __a = (a); \ 527cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (__m256i)__builtin_ia32_pslldqi256(__a, (count)*8); }) 528cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 529cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 5304f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_slli_epi16(__m256i __a, int __count) 531cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 5324f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count); 533cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 534cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 535cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 5364f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sll_epi16(__m256i __a, __m128i __count) 537cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 5384f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count); 539cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 540cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 541cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 5424f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_slli_epi32(__m256i __a, int __count) 543cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 5444f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count); 545cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 546cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 547cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 5484f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sll_epi32(__m256i __a, __m128i __count) 549cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 5504f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count); 551cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 552cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 553cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 5544f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_slli_epi64(__m256i __a, int __count) 555cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 5564f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_psllqi256(__a, __count); 557cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 558cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 559cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 5604f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sll_epi64(__m256i __a, __m128i __count) 561cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 5624f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_psllq256(__a, __count); 563cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 564cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 565cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 5664f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_srai_epi16(__m256i __a, int __count) 567cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 5684f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count); 569cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 570cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 571cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 5724f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sra_epi16(__m256i __a, __m128i __count) 573cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 5744f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count); 575cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 576cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 577cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 5784f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_srai_epi32(__m256i __a, int __count) 579cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 5804f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count); 581cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 582cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 583cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 5844f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sra_epi32(__m256i __a, __m128i __count) 585cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 5864f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count); 587cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 588cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 589cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper#define _mm256_srli_si256(a, count) __extension__ ({ \ 590cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper __m256i __a = (a); \ 591cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (__m256i)__builtin_ia32_psrldqi256(__a, (count)*8); }) 592cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 593cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 5944f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_srli_epi16(__m256i __a, int __count) 595cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 5964f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count); 597cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 598cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 599cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6004f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_srl_epi16(__m256i __a, __m128i __count) 601cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 6024f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count); 603cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 604cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 605cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6064f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_srli_epi32(__m256i __a, int __count) 607cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 6084f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count); 609cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 610cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 611cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6124f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_srl_epi32(__m256i __a, __m128i __count) 613cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 6144f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count); 615cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 616cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 617cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6184f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_srli_epi64(__m256i __a, int __count) 619cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 6204f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_psrlqi256(__a, __count); 621cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 622cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 623cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6244f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_srl_epi64(__m256i __a, __m128i __count) 625cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 6264f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_psrlq256(__a, __count); 627cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 628cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 629cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6304f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sub_epi8(__m256i __a, __m256i __b) 631925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 6324f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v32qi)__a - (__v32qi)__b); 633925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 634925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 635925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6364f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sub_epi16(__m256i __a, __m256i __b) 637925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 6384f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v16hi)__a - (__v16hi)__b); 639925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 640925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 641925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6424f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sub_epi32(__m256i __a, __m256i __b) 643925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 6444f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v8si)__a - (__v8si)__b); 645925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 646925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 647925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6484f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sub_epi64(__m256i __a, __m256i __b) 649925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 6504f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __a - __b; 651925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 6529c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 6539c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6544f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_subs_epi8(__m256i __a, __m256i __b) 6559c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 6564f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psubsb256((__v32qi)__a, (__v32qi)__b); 6579c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 6589c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 6599c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6604f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_subs_epi16(__m256i __a, __m256i __b) 6619c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 6624f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psubsw256((__v16hi)__a, (__v16hi)__b); 6639c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 6649c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 6659c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6664f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_subs_epu8(__m256i __a, __m256i __b) 6679c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 6684f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psubusb256((__v32qi)__a, (__v32qi)__b); 6699c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 6709c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 6719c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6724f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_subs_epu16(__m256i __a, __m256i __b) 6739c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 6744f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psubusw256((__v16hi)__a, (__v16hi)__b); 6759c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 6769c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 677735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6784f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_unpackhi_epi8(__m256i __a, __m256i __b) 6797f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{ 6804f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31); 6817f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper} 6827f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper 6837f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6844f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_unpackhi_epi16(__m256i __a, __m256i __b) 6857f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{ 6864f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15); 6877f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper} 6887f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper 6897f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6904f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_unpackhi_epi32(__m256i __a, __m256i __b) 6917f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{ 6924f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7); 6937f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper} 6947f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper 6957f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6964f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_unpackhi_epi64(__m256i __a, __m256i __b) 6977f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{ 6984f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_shufflevector(__a, __b, 1, 4+1, 3, 4+3); 6997f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper} 7007f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper 7017f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 7024f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_unpacklo_epi8(__m256i __a, __m256i __b) 7037f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{ 7044f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23); 7057f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper} 7067f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper 7077f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 7084f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_unpacklo_epi16(__m256i __a, __m256i __b) 7097f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{ 7104f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11); 7117f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper} 7127f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper 7137f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 7144f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_unpacklo_epi32(__m256i __a, __m256i __b) 7157f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{ 7164f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5); 7177f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper} 7187f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper 7197f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 7204f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_unpacklo_epi64(__m256i __a, __m256i __b) 7217f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{ 7224f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_shufflevector(__a, __b, 0, 4+0, 2, 4+2); 7237f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper} 7247f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper 7257f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 7264f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_xor_si256(__m256i __a, __m256i __b) 727735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper{ 7284f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __a ^ __b; 729735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper} 730ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper 731ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 732ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper_mm256_stream_load_si256(__m256i *__V) 733ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper{ 734ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper return (__m256i)__builtin_ia32_movntdqa256((__v4di *)__V); 735ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper} 736ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper 737ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topperstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 738ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper_mm_broadcastss_ps(__m128 __X) 739ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper{ 740ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper return (__m128)__builtin_ia32_vbroadcastss_ps((__v4sf)__X); 741ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper} 742ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper 743ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topperstatic __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 744ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper_mm256_broadcastss_ps(__m128 __X) 745ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper{ 746ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper return (__m256)__builtin_ia32_vbroadcastss_ps256((__v4sf)__X); 747ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper} 748ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper 749ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topperstatic __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) 750ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper_mm256_broadcastsd_pd(__m128d __X) 751ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper{ 752ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper return (__m256d)__builtin_ia32_vbroadcastsd_pd256((__v2df)__X); 753ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper} 754ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper 755ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 7564f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_broadcastsi128_si256(__m128i const *__a) 757ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper{ 7584f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_vbroadcastsi256(__a); 759ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper} 760ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper 761ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper#define _mm_blend_epi32(V1, V2, M) __extension__ ({ \ 762ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper __m128i __V1 = (V1); \ 763ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper __m128i __V2 = (V2); \ 76434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper (__m128i)__builtin_ia32_pblendd128((__v4si)__V1, (__v4si)__V2, (M)); }) 765ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper 766ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper#define _mm256_blend_epi32(V1, V2, M) __extension__ ({ \ 767ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper __m256i __V1 = (V1); \ 768ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper __m256i __V2 = (V2); \ 76934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper (__m256i)__builtin_ia32_pblendd256((__v8si)__V1, (__v8si)__V2, (M)); }) 77034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 77134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 77234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm256_broadcastb_epi8(__m128i __X) 77334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 77434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper return (__m256i)__builtin_ia32_pbroadcastb256((__v16qi)__X); 77534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 77634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 77734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 77834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm256_broadcastw_epi16(__m128i __X) 77934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 78034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper return (__m256i)__builtin_ia32_pbroadcastw256((__v8hi)__X); 78134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 78234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 78334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 78434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm256_broadcastd_epi32(__m128i __X) 78534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 78634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper return (__m256i)__builtin_ia32_pbroadcastd256((__v4si)__X); 78734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 78834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 78934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 79034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm256_broadcastq_epi64(__m128i __X) 79134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 79234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper return (__m256i)__builtin_ia32_pbroadcastq256(__X); 79334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 79434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 79534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 79634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm_broadcastb_epi8(__m128i __X) 79734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 79834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper return (__m128i)__builtin_ia32_pbroadcastb128((__v16qi)__X); 79934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 80034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 80134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 80234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm_broadcastw_epi16(__m128i __X) 80334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 80434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper return (__m128i)__builtin_ia32_pbroadcastw128((__v8hi)__X); 80534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 80634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 80734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 80834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 80934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm_broadcastd_epi32(__m128i __X) 81034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 81134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper return (__m128i)__builtin_ia32_pbroadcastd128((__v4si)__X); 81234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 81334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 81434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 81534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm_broadcastq_epi64(__m128i __X) 81634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 81734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper return (__m128i)__builtin_ia32_pbroadcastq128(__X); 81834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 81934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 82034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 8214f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) 82234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 8234f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b); 82434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 82534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 82634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper#define _mm256_permute4x64_pd(V, M) __extension__ ({ \ 82734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper __m256d __V = (V); \ 828b5491f3d7b688b37745397fafd6c1f10548fd5c2Craig Topper (__m256d)__builtin_shufflevector((__v4df)__V, (__v4df) _mm256_setzero_pd(), \ 829b5491f3d7b688b37745397fafd6c1f10548fd5c2Craig Topper (M) & 0x3, ((M) & 0xc) >> 2, \ 830b5491f3d7b688b37745397fafd6c1f10548fd5c2Craig Topper ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); }) 83134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 83234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 8334f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_permutevar8x32_ps(__m256 __a, __m256 __b) 83434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 8354f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8sf)__b); 83634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 83734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 83834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper#define _mm256_permute4x64_epi64(V, M) __extension__ ({ \ 83934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper __m256i __V = (V); \ 840b5491f3d7b688b37745397fafd6c1f10548fd5c2Craig Topper (__m256i)__builtin_shufflevector((__v4di)__V, (__v4di) _mm256_setzero_si256(), \ 841b5491f3d7b688b37745397fafd6c1f10548fd5c2Craig Topper (M) & 0x3, ((M) & 0xc) >> 2, \ 842b5491f3d7b688b37745397fafd6c1f10548fd5c2Craig Topper ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); }) 84334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 84434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper#define _mm256_permute2x128_si256(V1, V2, M) __extension__ ({ \ 84534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper __m256i __V1 = (V1); \ 84634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper __m256i __V2 = (V2); \ 84749a110db4c43835681bb89671f8f73c8d8c7c28cCraig Topper (__m256i)__builtin_ia32_permti256(__V1, __V2, (M)); }) 8485cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 8495cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper#define _mm256_extracti128_si256(A, O) __extension__ ({ \ 8505cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper __m256i __A = (A); \ 8515cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper (__m128i)__builtin_ia32_extract128i256(__A, (O)); }) 8525cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 8535cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper#define _mm256_inserti128_si256(V1, V2, O) __extension__ ({ \ 8545cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper __m256i __V1 = (V1); \ 8555cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper __m128i __V2 = (V2); \ 8565cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper (__m256i)__builtin_ia32_insert128i256(__V1, __V2, (O)); }) 8575cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 8585cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 8595cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_maskload_epi32(int const *__X, __m256i __M) 8605cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 8615cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m256i)__builtin_ia32_maskloadd256((const __v8si *)__X, (__v8si)__M); 8625cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 8635cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 8645cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 8655cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_maskload_epi64(long long const *__X, __m256i __M) 8665cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 8675cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, __M); 8685cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 8695cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 8705cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 8715cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_maskload_epi32(int const *__X, __m128i __M) 8725cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 8735cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m128i)__builtin_ia32_maskloadd((const __v4si *)__X, (__v4si)__M); 8745cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 8755cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 8765cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 8775cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_maskload_epi64(long long const *__X, __m128i __M) 8785cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 8795cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m128i)__builtin_ia32_maskloadq((const __v2di *)__X, (__v2di)__M); 8805cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 8815cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 8825cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 8835cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y) 8845cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 8855cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper __builtin_ia32_maskstored256((__v8si *)__X, (__v8si)__M, (__v8si)__Y); 8865cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 8875cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 8885cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 8895cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y) 8905cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 8915cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper __builtin_ia32_maskstoreq256((__v4di *)__X, __M, __Y); 8925cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 8935cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 8945cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 8955cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_maskstore_epi32(int *__X, __m128i __M, __m128i __Y) 8965cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 8975cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper __builtin_ia32_maskstored((__v4si *)__X, (__v4si)__M, (__v4si)__Y); 8985cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 8995cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9005cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 9015cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y) 9025cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9035cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper __builtin_ia32_maskstoreq(( __v2di *)__X, __M, __Y); 9045cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9055cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9065cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 9075cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_sllv_epi32(__m256i __X, __m256i __Y) 9085cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9095cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y); 9105cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9115cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9125cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 9135cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_sllv_epi32(__m128i __X, __m128i __Y) 9145cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9155cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y); 9165cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9175cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9185cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 9195cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_sllv_epi64(__m256i __X, __m256i __Y) 9205cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9215cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m256i)__builtin_ia32_psllv4di(__X, __Y); 9225cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9235cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9245cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 9255cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_sllv_epi64(__m128i __X, __m128i __Y) 9265cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9275cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m128i)__builtin_ia32_psllv2di(__X, __Y); 9285cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9295cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9305cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 9315cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_srav_epi32(__m256i __X, __m256i __Y) 9325cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9335cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y); 9345cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9355cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9365cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 9375cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_srav_epi32(__m128i __X, __m128i __Y) 9385cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9395cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y); 9405cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9415cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9425cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 9435cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_srlv_epi32(__m256i __X, __m256i __Y) 9445cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9455cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y); 9465cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9475cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9485cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 9495cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_srlv_epi32(__m128i __X, __m128i __Y) 9505cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9515cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y); 9525cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9535cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9545cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 9555cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_srlv_epi64(__m256i __X, __m256i __Y) 9565cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9575cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m256i)__builtin_ia32_psrlv4di(__X, __Y); 9585cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9595cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9605cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 9615cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_srlv_epi64(__m128i __X, __m128i __Y) 9625cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9635cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m128i)__builtin_ia32_psrlv2di(__X, __Y); 9645cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9655283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren 9665283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \ 9675283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128d __a = (a); \ 9685283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren double const *__m = (m); \ 9695283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128i __i = (i); \ 9705283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128d __mask = (mask); \ 9715283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__m128d)__builtin_ia32_gatherd_pd((__v2df)__a, (const __v2df *)__m, \ 9725283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__v4si)__i, (__v2df)__mask, (s)); }) 9735283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren 9745283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm256_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \ 9755283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256d __a = (a); \ 9765283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren double const *__m = (m); \ 977c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __i = (i); \ 9785283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256d __mask = (mask); \ 9795283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__m256d)__builtin_ia32_gatherd_pd256((__v4df)__a, (const __v4df *)__m, \ 980c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v4si)__i, (__v4df)__mask, (s)); }) 9815283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren 9825283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \ 9835283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128d __a = (a); \ 9845283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren double const *__m = (m); \ 9855283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128i __i = (i); \ 9865283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128d __mask = (mask); \ 9875283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__m128d)__builtin_ia32_gatherq_pd((__v2df)__a, (const __v2df *)__m, \ 9885283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__v2di)__i, (__v2df)__mask, (s)); }) 9895283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren 9905283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm256_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \ 9915283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256d __a = (a); \ 9925283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren double const *__m = (m); \ 9935283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256i __i = (i); \ 9945283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256d __mask = (mask); \ 9955283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__m256d)__builtin_ia32_gatherq_pd256((__v4df)__a, (const __v4df *)__m, \ 9965283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__v4di)__i, (__v4df)__mask, (s)); }) 9975283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren 9985283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \ 9995283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128 __a = (a); \ 10005283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren float const *__m = (m); \ 10015283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128i __i = (i); \ 10025283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128 __mask = (mask); \ 10035283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__m128)__builtin_ia32_gatherd_ps((__v4sf)__a, (const __v4sf *)__m, \ 10045283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__v4si)__i, (__v4sf)__mask, (s)); }) 10055283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren 10065283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm256_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \ 10075283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256 __a = (a); \ 10085283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren float const *__m = (m); \ 10095283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256i __i = (i); \ 10105283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256 __mask = (mask); \ 10115283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__m256)__builtin_ia32_gatherd_ps256((__v8sf)__a, (const __v8sf *)__m, \ 10125283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__v8si)__i, (__v8sf)__mask, (s)); }) 10135283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren 10145283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \ 10155283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128 __a = (a); \ 10165283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren float const *__m = (m); \ 10175283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128i __i = (i); \ 10185283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128 __mask = (mask); \ 10195283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__m128)__builtin_ia32_gatherq_ps((__v4sf)__a, (const __v4sf *)__m, \ 10205283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__v2di)__i, (__v4sf)__mask, (s)); }) 10215283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren 10225283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm256_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \ 1023c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128 __a = (a); \ 10245283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren float const *__m = (m); \ 10255283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256i __i = (i); \ 1026c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128 __mask = (mask); \ 1027c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m128)__builtin_ia32_gatherq_ps256((__v4sf)__a, (const __v4sf *)__m, \ 1028c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v4di)__i, (__v4sf)__mask, (s)); }) 1029c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren 1030c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \ 1031c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __a = (a); \ 1032c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren int const *__m = (m); \ 1033c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __i = (i); \ 1034c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __mask = (mask); \ 1035c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m128i)__builtin_ia32_gatherd_d((__v4si)__a, (const __v4si *)__m, \ 1036c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v4si)__i, (__v4si)__mask, (s)); }) 1037c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren 1038c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm256_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \ 1039c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __a = (a); \ 1040c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren int const *__m = (m); \ 1041c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __i = (i); \ 1042c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __mask = (mask); \ 1043c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m256i)__builtin_ia32_gatherd_d256((__v8si)__a, (const __v8si *)__m, \ 1044c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v8si)__i, (__v8si)__mask, (s)); }) 1045c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren 1046c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \ 1047c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __a = (a); \ 1048c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren int const *__m = (m); \ 1049c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __i = (i); \ 1050c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __mask = (mask); \ 1051c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m128i)__builtin_ia32_gatherq_d((__v4si)__a, (const __v4si *)__m, \ 1052c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v2di)__i, (__v4si)__mask, (s)); }) 1053c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren 1054c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm256_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \ 1055c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __a = (a); \ 1056c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren int const *__m = (m); \ 1057c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __i = (i); \ 1058c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __mask = (mask); \ 1059c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m128i)__builtin_ia32_gatherq_d256((__v4si)__a, (const __v4si *)__m, \ 1060c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v4di)__i, (__v4si)__mask, (s)); }) 1061c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren 1062c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \ 1063c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __a = (a); \ 1064c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren int const *__m = (m); \ 1065c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __i = (i); \ 1066c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __mask = (mask); \ 1067c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m128i)__builtin_ia32_gatherd_q((__v2di)__a, (const __v2di *)__m, \ 1068c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v4si)__i, (__v2di)__mask, (s)); }) 1069c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren 1070c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm256_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \ 1071c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __a = (a); \ 1072c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren int const *__m = (m); \ 1073c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __i = (i); \ 1074c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __mask = (mask); \ 1075c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m256i)__builtin_ia32_gatherd_q256((__v4di)__a, (const __v4di *)__m, \ 1076c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v4si)__i, (__v4di)__mask, (s)); }) 1077c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren 1078c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \ 1079c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __a = (a); \ 1080c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren int const *__m = (m); \ 1081c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __i = (i); \ 1082c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __mask = (mask); \ 1083c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m128i)__builtin_ia32_gatherq_q((__v2di)__a, (const __v2di *)__m, \ 1084c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v2di)__i, (__v2di)__mask, (s)); }) 1085c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren 1086c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm256_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \ 1087c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __a = (a); \ 1088c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren int const *__m = (m); \ 1089c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __i = (i); \ 1090c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __mask = (mask); \ 1091c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m256i)__builtin_ia32_gatherq_q256((__v4di)__a, (const __v4di *)__m, \ 1092c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v4di)__i, (__v4di)__mask, (s)); }) 109356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 109456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i32gather_pd(m, i, s) __extension__ ({ \ 109556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren double const *__m = (m); \ 109656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 109756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_setzero_pd(), \ 109856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v2df *)__m, (__v4si)__i, \ 109956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v2df)_mm_set1_pd((double)(long long int)-1), (s)); }) 110056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 110156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i32gather_pd(m, i, s) __extension__ ({ \ 110256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren double const *__m = (m); \ 110356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 110456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_setzero_pd(), \ 110556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4df *)__m, (__v4si)__i, \ 110656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4df)_mm256_set1_pd((double)(long long int)-1), (s)); }) 110756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 110856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i64gather_pd(m, i, s) __extension__ ({ \ 110956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren double const *__m = (m); \ 111056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 111156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_setzero_pd(), \ 111256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v2df *)__m, (__v2di)__i, \ 111356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v2df)_mm_set1_pd((double)(long long int)-1), (s)); }) 111456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 111556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i64gather_pd(m, i, s) __extension__ ({ \ 111656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren double const *__m = (m); \ 111756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m256i __i = (i); \ 111856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_setzero_pd(), \ 111956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4df *)__m, (__v4di)__i, \ 112056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4df)_mm256_set1_pd((double)(long long int)-1), (s)); }) 112156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 112256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i32gather_ps(m, i, s) __extension__ ({ \ 112356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren float const *__m = (m); \ 112456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 112556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_setzero_ps(), \ 112656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4sf *)__m, (__v4si)__i, \ 112756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4sf)_mm_set1_ps((float)(int)-1), (s)); }) 112856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 112956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i32gather_ps(m, i, s) __extension__ ({ \ 113056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren float const *__m = (m); \ 113156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m256i __i = (i); \ 113256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_setzero_ps(), \ 113356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v8sf *)__m, (__v8si)__i, \ 113456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v8sf)_mm256_set1_ps((float)(int)-1), (s)); }) 113556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 113656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i64gather_ps(m, i, s) __extension__ ({ \ 113756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren float const *__m = (m); \ 113856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 113956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_setzero_ps(), \ 114056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4sf *)__m, (__v2di)__i, \ 114156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4sf)_mm_set1_ps((float)(int)-1), (s)); }) 114256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 114356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i64gather_ps(m, i, s) __extension__ ({ \ 114456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren float const *__m = (m); \ 114556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m256i __i = (i); \ 114656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_setzero_ps(), \ 114756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4sf *)__m, (__v4di)__i, \ 114856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4sf)_mm_set1_ps((float)(int)-1), (s)); }) 114956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 115056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i32gather_epi32(m, i, s) __extension__ ({ \ 115156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren int const *__m = (m); \ 115256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 115356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_setzero_si128(), \ 115456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4si *)__m, (__v4si)__i, \ 115556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4si)_mm_set1_epi32(-1), (s)); }) 115656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 115756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i32gather_epi32(m, i, s) __extension__ ({ \ 115856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren int const *__m = (m); \ 115956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m256i __i = (i); \ 116056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_setzero_si256(), \ 116156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v8si *)__m, (__v8si)__i, \ 116256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v8si)_mm256_set1_epi32(-1), (s)); }) 116356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 116456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i64gather_epi32(m, i, s) __extension__ ({ \ 116556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren int const *__m = (m); \ 116656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 116756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_setzero_si128(), \ 116856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4si *)__m, (__v2di)__i, \ 116956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4si)_mm_set1_epi32(-1), (s)); }) 117056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 117156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i64gather_epi32(m, i, s) __extension__ ({ \ 117256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren int const *__m = (m); \ 117356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m256i __i = (i); \ 117456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_setzero_si128(), \ 117556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4si *)__m, (__v4di)__i, \ 117656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4si)_mm_set1_epi32(-1), (s)); }) 117756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 117856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i32gather_epi64(m, i, s) __extension__ ({ \ 117956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren int const *__m = (m); \ 118056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 118156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_setzero_si128(), \ 118256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v2di *)__m, (__v4si)__i, \ 118356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v2di)_mm_set1_epi64x(-1), (s)); }) 118456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 118556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i32gather_epi64(m, i, s) __extension__ ({ \ 118656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren int const *__m = (m); \ 118756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 118856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_setzero_si256(), \ 118956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4di *)__m, (__v4si)__i, \ 119056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4di)_mm256_set1_epi64x(-1), (s)); }) 119156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 119256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i64gather_epi64(m, i, s) __extension__ ({ \ 119356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren int const *__m = (m); \ 119456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 119556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_setzero_si128(), \ 119656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v2di *)__m, (__v2di)__i, \ 119756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v2di)_mm_set1_epi64x(-1), (s)); }) 119856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 119956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i64gather_epi64(m, i, s) __extension__ ({ \ 120056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren int const *__m = (m); \ 120156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m256i __i = (i); \ 120256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_setzero_si256(), \ 120356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4di *)__m, (__v4di)__i, \ 120456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4di)_mm256_set1_epi64x(-1), (s)); }) 12057cb4fae8da8f541f43d39896c989b06c69fd7821Richard Smith 12067cb4fae8da8f541f43d39896c989b06c69fd7821Richard Smith#endif /* __AVX2INTRIN_H */ 1207