1925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper/*===---- avx2intrin.h - AVX2 intrinsics -----------------------------------=== 2925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * 3925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * Permission is hereby granted, free of charge, to any person obtaining a copy 4925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * of this software and associated documentation files (the "Software"), to deal 5925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * in the Software without restriction, including without limitation the rights 6925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * copies of the Software, and to permit persons to whom the Software is 8925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * furnished to do so, subject to the following conditions: 9925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * 10925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * The above copyright notice and this permission notice shall be included in 11925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * all copies or substantial portions of the Software. 12925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * 13925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * THE SOFTWARE. 20925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * 21925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper *===-----------------------------------------------------------------------=== 22925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper */ 23925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 24925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper#ifndef __IMMINTRIN_H 25925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper#error "Never use <avx2intrin.h> directly; include <immintrin.h> instead." 26925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper#endif 27925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 287cb4fae8da8f541f43d39896c989b06c69fd7821Richard Smith#ifndef __AVX2INTRIN_H 297cb4fae8da8f541f43d39896c989b06c69fd7821Richard Smith#define __AVX2INTRIN_H 307cb4fae8da8f541f43d39896c989b06c69fd7821Richard Smith 31925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper/* SSE4 Multiple Packed Sums of Absolute Difference. */ 32925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper#define _mm256_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw256((X), (Y), (M)) 33925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 34925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 354f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_abs_epi8(__m256i __a) 36925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 374f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a); 38925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 39925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 40925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 414f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_abs_epi16(__m256i __a) 42925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 434f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a); 44925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 45925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 46925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 474f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_abs_epi32(__m256i __a) 48925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 494f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pabsd256((__v8si)__a); 50925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 51925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 52925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 534f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_packs_epi16(__m256i __a, __m256i __b) 54925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 554f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b); 56925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 57925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 58925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 594f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_packs_epi32(__m256i __a, __m256i __b) 60925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 614f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b); 62925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 63925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 64925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 654f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_packus_epi16(__m256i __a, __m256i __b) 66925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 674f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b); 68925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 69925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 70925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 71925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_packus_epi32(__m256i __V1, __m256i __V2) 72925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 73925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2); 74925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 75925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 76925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 774f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_add_epi8(__m256i __a, __m256i __b) 78925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 794f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v32qi)__a + (__v32qi)__b); 80925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 81925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 82925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 834f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_add_epi16(__m256i __a, __m256i __b) 84925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 854f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v16hi)__a + (__v16hi)__b); 86925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 87925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 88925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 894f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_add_epi32(__m256i __a, __m256i __b) 90925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 914f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v8si)__a + (__v8si)__b); 92925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 93925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 94925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 954f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_add_epi64(__m256i __a, __m256i __b) 96925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 974f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __a + __b; 98925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 99925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 100925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1014f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_adds_epi8(__m256i __a, __m256i __b) 1029c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 1034f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_paddsb256((__v32qi)__a, (__v32qi)__b); 1049c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 1059c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 1069c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1074f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_adds_epi16(__m256i __a, __m256i __b) 1089c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 1094f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_paddsw256((__v16hi)__a, (__v16hi)__b); 1109c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 1119c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 1129c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1134f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_adds_epu8(__m256i __a, __m256i __b) 1149c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 1154f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_paddusb256((__v32qi)__a, (__v32qi)__b); 1169c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 1179c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 1189c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1194f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_adds_epu16(__m256i __a, __m256i __b) 1209c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 1214f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b); 1229c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 1239c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 1249c2ffd803af03f1728423d0d73ff87d988642633Craig Topper#define _mm256_alignr_epi8(a, b, n) __extension__ ({ \ 1259c2ffd803af03f1728423d0d73ff87d988642633Craig Topper __m256i __a = (a); \ 1269c2ffd803af03f1728423d0d73ff87d988642633Craig Topper __m256i __b = (b); \ 1279c2ffd803af03f1728423d0d73ff87d988642633Craig Topper (__m256i)__builtin_ia32_palignr256((__v32qi)__a, (__v32qi)__b, (n)); }) 1289c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 1299c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1304f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_and_si256(__m256i __a, __m256i __b) 131735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper{ 1324f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __a & __b; 133735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper} 134735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper 135735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1364f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_andnot_si256(__m256i __a, __m256i __b) 137735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper{ 1384f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return ~__a & __b; 139735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper} 140735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper 141735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1424f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_avg_epu8(__m256i __a, __m256i __b) 1434c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1444f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b); 1454c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1464c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1474c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1484f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_avg_epu16(__m256i __a, __m256i __b) 1494c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1504f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b); 1514c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1524c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1534c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1544c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M) 1554c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1564c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2, 1574c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper (__v32qi)__M); 1584c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1594c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1604c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper#define _mm256_blend_epi16(V1, V2, M) __extension__ ({ \ 1614c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper __m256i __V1 = (V1); \ 1624c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper __m256i __V2 = (V2); \ 1636bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (__m256d)__builtin_shufflevector((__v16hi)__V1, (__v16hi)__V2, \ 1646bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x01) ? 16 : 0), \ 1656bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x02) ? 17 : 1), \ 1666bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x04) ? 18 : 2), \ 1676bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x08) ? 19 : 3), \ 1686bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x10) ? 20 : 4), \ 1696bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x20) ? 21 : 5), \ 1706bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x40) ? 22 : 6), \ 1716bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x80) ? 23 : 7), \ 1726bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x01) ? 24 : 8), \ 1736bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x02) ? 25 : 9), \ 1746bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x04) ? 26 : 10), \ 1756bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x08) ? 27 : 11), \ 1766bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x10) ? 28 : 12), \ 1776bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x20) ? 29 : 13), \ 1786bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x40) ? 30 : 14), \ 1796bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x80) ? 31 : 15)); }) 1804c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1814c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1824f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cmpeq_epi8(__m256i __a, __m256i __b) 1834c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1844f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v32qi)__a == (__v32qi)__b); 1854c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1864c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1874c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1884f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cmpeq_epi16(__m256i __a, __m256i __b) 1894c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1904f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v16hi)__a == (__v16hi)__b); 1914c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1924c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1934c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1944f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cmpeq_epi32(__m256i __a, __m256i __b) 1954c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1964f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v8si)__a == (__v8si)__b); 1974c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1984c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1994c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2004f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cmpeq_epi64(__m256i __a, __m256i __b) 2014c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 2024f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)(__a == __b); 2034c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 2044c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 2054c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2064f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cmpgt_epi8(__m256i __a, __m256i __b) 2074c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 2084f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v32qi)__a > (__v32qi)__b); 2094c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 2104c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 2114c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2124f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cmpgt_epi16(__m256i __a, __m256i __b) 2134c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 2144f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v16hi)__a > (__v16hi)__b); 2154c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 2164c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 2174c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2184f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cmpgt_epi32(__m256i __a, __m256i __b) 2194c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 2204f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v8si)__a > (__v8si)__b); 2214c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 2224c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 2234c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2244f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cmpgt_epi64(__m256i __a, __m256i __b) 2254c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 2264f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)(__a > __b); 2274c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 2284c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 2294c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2304f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_hadd_epi16(__m256i __a, __m256i __b) 231318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{ 2324f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b); 233318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper} 234318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper 235318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2364f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_hadd_epi32(__m256i __a, __m256i __b) 237318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{ 2384f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b); 239318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper} 240318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper 241318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2424f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_hadds_epi16(__m256i __a, __m256i __b) 243318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{ 2444f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b); 245318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper} 246318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper 247318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2484f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_hsub_epi16(__m256i __a, __m256i __b) 249318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{ 2504f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b); 251318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper} 252318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper 253318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2544f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_hsub_epi32(__m256i __a, __m256i __b) 255318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{ 2564f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b); 257318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper} 258318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper 259318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2604f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_hsubs_epi16(__m256i __a, __m256i __b) 261318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{ 2624f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b); 263318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper} 264318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper 265318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2664f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_maddubs_epi16(__m256i __a, __m256i __b) 2674a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper{ 2684f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b); 2694a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper} 2704a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper 2714a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2724f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_madd_epi16(__m256i __a, __m256i __b) 2734a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper{ 2744f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b); 2754a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper} 2764a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper 2774a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2784f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_max_epi8(__m256i __a, __m256i __b) 279231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 2804f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b); 281231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 282231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 283231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2844f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_max_epi16(__m256i __a, __m256i __b) 285231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 2864f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b); 287231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 288231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 289231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2904f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_max_epi32(__m256i __a, __m256i __b) 291231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 2924f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b); 293231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 294231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 295231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2964f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_max_epu8(__m256i __a, __m256i __b) 297231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 2984f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b); 299231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 300231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 301231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 3024f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_max_epu16(__m256i __a, __m256i __b) 303231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 3044f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b); 305231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 306231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 307231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 3084f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_max_epu32(__m256i __a, __m256i __b) 309231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 3104f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b); 311231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 312231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 313231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 3144f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_min_epi8(__m256i __a, __m256i __b) 315231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 3164f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b); 317231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 318231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 319231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 3204f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_min_epi16(__m256i __a, __m256i __b) 321231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 3224f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b); 323231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 324231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 325231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 3264f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_min_epi32(__m256i __a, __m256i __b) 327231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 3284f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b); 329231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 330231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 331231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 3324f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_min_epu8(__m256i __a, __m256i __b) 333231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 3344f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b); 335231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 336231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 337231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 3384f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_min_epu16(__m256i __a, __m256i __b) 339231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 3404f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b); 341231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 342231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 343231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 3444f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_min_epu32(__m256i __a, __m256i __b) 345231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 3464f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b); 347231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 348231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 349231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 3504f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_movemask_epi8(__m256i __a) 351231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 3524f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_pmovmskb256((__v32qi)__a); 353231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 354231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 355231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 356231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi8_epi16(__m128i __V) 357231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 358231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovsxbw256((__v16qi)__V); 359231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 360231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 361231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 362231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi8_epi32(__m128i __V) 363231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 364231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovsxbd256((__v16qi)__V); 365231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 366231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 367231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 368231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi8_epi64(__m128i __V) 369231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 370231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovsxbq256((__v16qi)__V); 371231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 372231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 373231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 374231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi16_epi32(__m128i __V) 375231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 376231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovsxwd256((__v8hi)__V); 377231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 378231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 379231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 380231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi16_epi64(__m128i __V) 381231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 382231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovsxwq256((__v8hi)__V); 383231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 384231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 385231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 386231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi32_epi64(__m128i __V) 387231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 388231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovsxdq256((__v4si)__V); 389231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 390231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 391231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 392231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu8_epi16(__m128i __V) 393231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 394231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovzxbw256((__v16qi)__V); 395231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 396231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 397231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 398231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu8_epi32(__m128i __V) 399231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 400231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovzxbd256((__v16qi)__V); 401231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 402231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 403231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 404231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu8_epi64(__m128i __V) 405231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 406231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovzxbq256((__v16qi)__V); 407231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 408231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 409231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 410231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu16_epi32(__m128i __V) 411231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 412231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovzxwd256((__v8hi)__V); 413231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 414231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 415231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 416231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu16_epi64(__m128i __V) 417231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 418231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovzxwq256((__v8hi)__V); 419231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 420231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 421231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 422231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu32_epi64(__m128i __V) 423231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 424231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovzxdq256((__v4si)__V); 425231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 426231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 42728a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 4284f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_mul_epi32(__m256i __a, __m256i __b) 42928a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 4304f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b); 43128a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 43228a324a30b0677309a4c5d73ef5197398265e129Craig Topper 43328a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 4344f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_mulhrs_epi16(__m256i __a, __m256i __b) 43528a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 4364f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b); 43728a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 43828a324a30b0677309a4c5d73ef5197398265e129Craig Topper 43928a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 4404f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_mulhi_epu16(__m256i __a, __m256i __b) 44128a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 4424f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b); 44328a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 44428a324a30b0677309a4c5d73ef5197398265e129Craig Topper 44528a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 4464f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_mulhi_epi16(__m256i __a, __m256i __b) 44728a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 4484f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b); 44928a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 45028a324a30b0677309a4c5d73ef5197398265e129Craig Topper 45128a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 4524f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_mullo_epi16(__m256i __a, __m256i __b) 45328a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 4544f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v16hi)__a * (__v16hi)__b); 45528a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 45628a324a30b0677309a4c5d73ef5197398265e129Craig Topper 45728a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 4584f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_mullo_epi32 (__m256i __a, __m256i __b) 45928a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 4604f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v8si)__a * (__v8si)__b); 46128a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 46228a324a30b0677309a4c5d73ef5197398265e129Craig Topper 46328a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 4644f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_mul_epu32(__m256i __a, __m256i __b) 46528a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 4664f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b); 46728a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 46828a324a30b0677309a4c5d73ef5197398265e129Craig Topper 469231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 4704f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_or_si256(__m256i __a, __m256i __b) 471735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper{ 4724f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __a | __b; 473735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper} 474735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper 475735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 4764f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sad_epu8(__m256i __a, __m256i __b) 477cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 4784f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b); 479cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 480cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 481cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 4824f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_shuffle_epi8(__m256i __a, __m256i __b) 483cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 4844f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b); 485cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 486cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 487cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper#define _mm256_shuffle_epi32(a, imm) __extension__ ({ \ 488cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper __m256i __a = (a); \ 489cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)_mm256_set1_epi32(0), \ 490cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (imm) & 0x3, ((imm) & 0xc) >> 2, \ 491cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \ 492cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0x03) >> 0), \ 493cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0x0c) >> 2), \ 494cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0x30) >> 4), \ 495cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0xc0) >> 6)); }) 496cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 497cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper#define _mm256_shufflehi_epi16(a, imm) __extension__ ({ \ 498cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper __m256i __a = (a); \ 499cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)_mm256_set1_epi16(0), \ 500cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 0, 1, 2, 3, \ 501cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0x03) >> 0), \ 502cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0x0c) >> 2), \ 503cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0x30) >> 4), \ 504cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0xc0) >> 6), \ 505cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 8, 9, 10, 11, \ 506cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 12 + (((imm) & 0x03) >> 0), \ 507cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 12 + (((imm) & 0x0c) >> 2), \ 508cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 12 + (((imm) & 0x30) >> 4), \ 509cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 12 + (((imm) & 0xc0) >> 6)); }) 510cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 511cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper#define _mm256_shufflelo_epi16(a, imm) __extension__ ({ \ 512cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper __m256i __a = (a); \ 513cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)_mm256_set1_epi16(0), \ 514cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (imm) & 0x3,((imm) & 0xc) >> 2, \ 515cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \ 516cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4, 5, 6, 7, \ 517cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 8 + (((imm) & 0x03) >> 0), \ 518cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 8 + (((imm) & 0x0c) >> 2), \ 519cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 8 + (((imm) & 0x30) >> 4), \ 520cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 8 + (((imm) & 0xc0) >> 6), \ 521cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 12, 13, 14, 15); }) 522cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 523cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 5244f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sign_epi8(__m256i __a, __m256i __b) 525cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 5264f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b); 527cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 528cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 529cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 5304f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sign_epi16(__m256i __a, __m256i __b) 531cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 5324f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b); 533cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 534cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 535cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 5364f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sign_epi32(__m256i __a, __m256i __b) 537cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 5384f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b); 539cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 540cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 541cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper#define _mm256_slli_si256(a, count) __extension__ ({ \ 542cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper __m256i __a = (a); \ 543cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (__m256i)__builtin_ia32_pslldqi256(__a, (count)*8); }) 544cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 545cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 5464f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_slli_epi16(__m256i __a, int __count) 547cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 5484f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count); 549cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 550cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 551cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 5524f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sll_epi16(__m256i __a, __m128i __count) 553cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 5544f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count); 555cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 556cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 557cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 5584f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_slli_epi32(__m256i __a, int __count) 559cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 5604f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count); 561cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 562cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 563cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 5644f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sll_epi32(__m256i __a, __m128i __count) 565cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 5664f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count); 567cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 568cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 569cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 5704f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_slli_epi64(__m256i __a, int __count) 571cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 5724f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_psllqi256(__a, __count); 573cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 574cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 575cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 5764f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sll_epi64(__m256i __a, __m128i __count) 577cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 5784f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_psllq256(__a, __count); 579cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 580cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 581cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 5824f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_srai_epi16(__m256i __a, int __count) 583cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 5844f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count); 585cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 586cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 587cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 5884f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sra_epi16(__m256i __a, __m128i __count) 589cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 5904f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count); 591cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 592cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 593cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 5944f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_srai_epi32(__m256i __a, int __count) 595cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 5964f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count); 597cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 598cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 599cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6004f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sra_epi32(__m256i __a, __m128i __count) 601cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 6024f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count); 603cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 604cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 605cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper#define _mm256_srli_si256(a, count) __extension__ ({ \ 606cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper __m256i __a = (a); \ 607cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (__m256i)__builtin_ia32_psrldqi256(__a, (count)*8); }) 608cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 609cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6104f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_srli_epi16(__m256i __a, int __count) 611cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 6124f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count); 613cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 614cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 615cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6164f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_srl_epi16(__m256i __a, __m128i __count) 617cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 6184f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count); 619cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 620cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 621cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6224f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_srli_epi32(__m256i __a, int __count) 623cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 6244f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count); 625cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 626cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 627cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6284f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_srl_epi32(__m256i __a, __m128i __count) 629cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 6304f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count); 631cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 632cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 633cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6344f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_srli_epi64(__m256i __a, int __count) 635cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 6364f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_psrlqi256(__a, __count); 637cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 638cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 639cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6404f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_srl_epi64(__m256i __a, __m128i __count) 641cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 6424f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_psrlq256(__a, __count); 643cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 644cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 645cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6464f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sub_epi8(__m256i __a, __m256i __b) 647925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 6484f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v32qi)__a - (__v32qi)__b); 649925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 650925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 651925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6524f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sub_epi16(__m256i __a, __m256i __b) 653925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 6544f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v16hi)__a - (__v16hi)__b); 655925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 656925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 657925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6584f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sub_epi32(__m256i __a, __m256i __b) 659925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 6604f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)((__v8si)__a - (__v8si)__b); 661925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 662925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 663925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6644f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sub_epi64(__m256i __a, __m256i __b) 665925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 6664f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __a - __b; 667925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 6689c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 6699c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6704f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_subs_epi8(__m256i __a, __m256i __b) 6719c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 6724f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psubsb256((__v32qi)__a, (__v32qi)__b); 6739c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 6749c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 6759c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6764f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_subs_epi16(__m256i __a, __m256i __b) 6779c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 6784f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psubsw256((__v16hi)__a, (__v16hi)__b); 6799c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 6809c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 6819c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6824f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_subs_epu8(__m256i __a, __m256i __b) 6839c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 6844f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psubusb256((__v32qi)__a, (__v32qi)__b); 6859c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 6869c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 6879c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6884f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_subs_epu16(__m256i __a, __m256i __b) 6899c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 6904f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_psubusw256((__v16hi)__a, (__v16hi)__b); 6919c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 6929c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 693735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6944f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_unpackhi_epi8(__m256i __a, __m256i __b) 6957f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{ 6964f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31); 6977f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper} 6987f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper 6997f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 7004f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_unpackhi_epi16(__m256i __a, __m256i __b) 7017f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{ 7024f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15); 7037f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper} 7047f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper 7057f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 7064f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_unpackhi_epi32(__m256i __a, __m256i __b) 7077f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{ 7084f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7); 7097f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper} 7107f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper 7117f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 7124f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_unpackhi_epi64(__m256i __a, __m256i __b) 7137f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{ 7144f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_shufflevector(__a, __b, 1, 4+1, 3, 4+3); 7157f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper} 7167f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper 7177f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 7184f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_unpacklo_epi8(__m256i __a, __m256i __b) 7197f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{ 7204f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23); 7217f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper} 7227f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper 7237f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 7244f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_unpacklo_epi16(__m256i __a, __m256i __b) 7257f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{ 7264f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11); 7277f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper} 7287f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper 7297f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 7304f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_unpacklo_epi32(__m256i __a, __m256i __b) 7317f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{ 7324f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5); 7337f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper} 7347f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper 7357f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 7364f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_unpacklo_epi64(__m256i __a, __m256i __b) 7377f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{ 7384f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_shufflevector(__a, __b, 0, 4+0, 2, 4+2); 7397f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper} 7407f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper 7417f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 7424f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_xor_si256(__m256i __a, __m256i __b) 743735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper{ 7444f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __a ^ __b; 745735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper} 746ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper 747ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 748ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper_mm256_stream_load_si256(__m256i *__V) 749ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper{ 750ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper return (__m256i)__builtin_ia32_movntdqa256((__v4di *)__V); 751ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper} 752ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper 753ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topperstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 754ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper_mm_broadcastss_ps(__m128 __X) 755ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper{ 756ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper return (__m128)__builtin_ia32_vbroadcastss_ps((__v4sf)__X); 757ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper} 758ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper 759ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topperstatic __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 760ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper_mm256_broadcastss_ps(__m128 __X) 761ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper{ 762ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper return (__m256)__builtin_ia32_vbroadcastss_ps256((__v4sf)__X); 763ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper} 764ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper 765ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topperstatic __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) 766ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper_mm256_broadcastsd_pd(__m128d __X) 767ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper{ 768ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper return (__m256d)__builtin_ia32_vbroadcastsd_pd256((__v2df)__X); 769ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper} 770ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper 771ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 7722766deb114cc5d3420027764438cf683dda8a9f0Juergen Ributzka_mm256_broadcastsi128_si256(__m128i __X) 773ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper{ 7742766deb114cc5d3420027764438cf683dda8a9f0Juergen Ributzka return (__m256i)__builtin_ia32_vbroadcastsi256(__X); 775ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper} 776ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper 777ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper#define _mm_blend_epi32(V1, V2, M) __extension__ ({ \ 778ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper __m128i __V1 = (V1); \ 779ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper __m128i __V2 = (V2); \ 7806bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (__m128i)__builtin_shufflevector((__v4si)__V1, (__v4si)__V2, \ 7816bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x01) ? 4 : 0), \ 7826bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x02) ? 5 : 1), \ 7836bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x04) ? 6 : 2), \ 7846bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x08) ? 7 : 3)); }) 785ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper 786ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper#define _mm256_blend_epi32(V1, V2, M) __extension__ ({ \ 787ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper __m256i __V1 = (V1); \ 788ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper __m256i __V2 = (V2); \ 7896bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (__m256i)__builtin_shufflevector((__v8si)__V1, (__v8si)__V2, \ 7906bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x01) ? 8 : 0), \ 7916bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x02) ? 9 : 1), \ 7926bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x04) ? 10 : 2), \ 7936bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x08) ? 11 : 3), \ 7946bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x10) ? 12 : 4), \ 7956bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x20) ? 13 : 5), \ 7966bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x40) ? 14 : 6), \ 7976bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x80) ? 15 : 7)); }) 79834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 79934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 80034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm256_broadcastb_epi8(__m128i __X) 80134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 80234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper return (__m256i)__builtin_ia32_pbroadcastb256((__v16qi)__X); 80334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 80434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 80534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 80634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm256_broadcastw_epi16(__m128i __X) 80734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 80834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper return (__m256i)__builtin_ia32_pbroadcastw256((__v8hi)__X); 80934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 81034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 81134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 81234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm256_broadcastd_epi32(__m128i __X) 81334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 81434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper return (__m256i)__builtin_ia32_pbroadcastd256((__v4si)__X); 81534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 81634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 81734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 81834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm256_broadcastq_epi64(__m128i __X) 81934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 82034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper return (__m256i)__builtin_ia32_pbroadcastq256(__X); 82134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 82234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 82334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 82434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm_broadcastb_epi8(__m128i __X) 82534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 82634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper return (__m128i)__builtin_ia32_pbroadcastb128((__v16qi)__X); 82734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 82834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 82934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 83034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm_broadcastw_epi16(__m128i __X) 83134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 83234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper return (__m128i)__builtin_ia32_pbroadcastw128((__v8hi)__X); 83334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 83434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 83534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 83634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 83734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm_broadcastd_epi32(__m128i __X) 83834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 83934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper return (__m128i)__builtin_ia32_pbroadcastd128((__v4si)__X); 84034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 84134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 84234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 84334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm_broadcastq_epi64(__m128i __X) 84434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 84534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper return (__m128i)__builtin_ia32_pbroadcastq128(__X); 84634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 84734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 84834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 8494f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) 85034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 8514f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b); 85234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 85334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 85434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper#define _mm256_permute4x64_pd(V, M) __extension__ ({ \ 85534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper __m256d __V = (V); \ 856b5491f3d7b688b37745397fafd6c1f10548fd5c2Craig Topper (__m256d)__builtin_shufflevector((__v4df)__V, (__v4df) _mm256_setzero_pd(), \ 857b5491f3d7b688b37745397fafd6c1f10548fd5c2Craig Topper (M) & 0x3, ((M) & 0xc) >> 2, \ 858b5491f3d7b688b37745397fafd6c1f10548fd5c2Craig Topper ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); }) 85934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 86034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 8614f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_permutevar8x32_ps(__m256 __a, __m256 __b) 86234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 8634f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8sf)__b); 86434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 86534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 86634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper#define _mm256_permute4x64_epi64(V, M) __extension__ ({ \ 86734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper __m256i __V = (V); \ 868b5491f3d7b688b37745397fafd6c1f10548fd5c2Craig Topper (__m256i)__builtin_shufflevector((__v4di)__V, (__v4di) _mm256_setzero_si256(), \ 869b5491f3d7b688b37745397fafd6c1f10548fd5c2Craig Topper (M) & 0x3, ((M) & 0xc) >> 2, \ 870b5491f3d7b688b37745397fafd6c1f10548fd5c2Craig Topper ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); }) 87134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 87234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper#define _mm256_permute2x128_si256(V1, V2, M) __extension__ ({ \ 87334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper __m256i __V1 = (V1); \ 87434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper __m256i __V2 = (V2); \ 87549a110db4c43835681bb89671f8f73c8d8c7c28cCraig Topper (__m256i)__builtin_ia32_permti256(__V1, __V2, (M)); }) 8765cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 8775cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper#define _mm256_extracti128_si256(A, O) __extension__ ({ \ 8785cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper __m256i __A = (A); \ 8795cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper (__m128i)__builtin_ia32_extract128i256(__A, (O)); }) 8805cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 8815cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper#define _mm256_inserti128_si256(V1, V2, O) __extension__ ({ \ 8825cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper __m256i __V1 = (V1); \ 8835cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper __m128i __V2 = (V2); \ 8845cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper (__m256i)__builtin_ia32_insert128i256(__V1, __V2, (O)); }) 8855cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 8865cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 8875cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_maskload_epi32(int const *__X, __m256i __M) 8885cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 8895cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m256i)__builtin_ia32_maskloadd256((const __v8si *)__X, (__v8si)__M); 8905cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 8915cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 8925cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 8935cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_maskload_epi64(long long const *__X, __m256i __M) 8945cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 8955cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, __M); 8965cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 8975cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 8985cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 8995cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_maskload_epi32(int const *__X, __m128i __M) 9005cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9015cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m128i)__builtin_ia32_maskloadd((const __v4si *)__X, (__v4si)__M); 9025cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9035cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9045cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 9055cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_maskload_epi64(long long const *__X, __m128i __M) 9065cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9075cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m128i)__builtin_ia32_maskloadq((const __v2di *)__X, (__v2di)__M); 9085cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9095cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9105cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 9115cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y) 9125cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9135cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper __builtin_ia32_maskstored256((__v8si *)__X, (__v8si)__M, (__v8si)__Y); 9145cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9155cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9165cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 9175cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y) 9185cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9195cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper __builtin_ia32_maskstoreq256((__v4di *)__X, __M, __Y); 9205cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9215cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9225cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 9235cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_maskstore_epi32(int *__X, __m128i __M, __m128i __Y) 9245cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9255cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper __builtin_ia32_maskstored((__v4si *)__X, (__v4si)__M, (__v4si)__Y); 9265cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9275cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9285cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 9295cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y) 9305cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9315cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper __builtin_ia32_maskstoreq(( __v2di *)__X, __M, __Y); 9325cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9335cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9345cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 9355cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_sllv_epi32(__m256i __X, __m256i __Y) 9365cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9375cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y); 9385cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9395cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9405cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 9415cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_sllv_epi32(__m128i __X, __m128i __Y) 9425cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9435cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y); 9445cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9455cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9465cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 9475cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_sllv_epi64(__m256i __X, __m256i __Y) 9485cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9495cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m256i)__builtin_ia32_psllv4di(__X, __Y); 9505cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9515cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9525cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 9535cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_sllv_epi64(__m128i __X, __m128i __Y) 9545cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9555cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m128i)__builtin_ia32_psllv2di(__X, __Y); 9565cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9575cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9585cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 9595cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_srav_epi32(__m256i __X, __m256i __Y) 9605cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9615cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y); 9625cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9635cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9645cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 9655cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_srav_epi32(__m128i __X, __m128i __Y) 9665cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9675cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y); 9685cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9695cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9705cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 9715cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_srlv_epi32(__m256i __X, __m256i __Y) 9725cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9735cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y); 9745cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9755cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9765cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 9775cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_srlv_epi32(__m128i __X, __m128i __Y) 9785cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9795cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y); 9805cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9815cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9825cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 9835cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_srlv_epi64(__m256i __X, __m256i __Y) 9845cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9855cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m256i)__builtin_ia32_psrlv4di(__X, __Y); 9865cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9875cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9885cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 9895cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_srlv_epi64(__m128i __X, __m128i __Y) 9905cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9915cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m128i)__builtin_ia32_psrlv2di(__X, __Y); 9925cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9935283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren 9945283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \ 9955283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128d __a = (a); \ 9965283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren double const *__m = (m); \ 9975283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128i __i = (i); \ 9985283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128d __mask = (mask); \ 9995283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__m128d)__builtin_ia32_gatherd_pd((__v2df)__a, (const __v2df *)__m, \ 10005283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__v4si)__i, (__v2df)__mask, (s)); }) 10015283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren 10025283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm256_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \ 10035283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256d __a = (a); \ 10045283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren double const *__m = (m); \ 1005c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __i = (i); \ 10065283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256d __mask = (mask); \ 10075283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__m256d)__builtin_ia32_gatherd_pd256((__v4df)__a, (const __v4df *)__m, \ 1008c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v4si)__i, (__v4df)__mask, (s)); }) 10095283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren 10105283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \ 10115283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128d __a = (a); \ 10125283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren double const *__m = (m); \ 10135283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128i __i = (i); \ 10145283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128d __mask = (mask); \ 10155283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__m128d)__builtin_ia32_gatherq_pd((__v2df)__a, (const __v2df *)__m, \ 10165283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__v2di)__i, (__v2df)__mask, (s)); }) 10175283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren 10185283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm256_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \ 10195283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256d __a = (a); \ 10205283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren double const *__m = (m); \ 10215283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256i __i = (i); \ 10225283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256d __mask = (mask); \ 10235283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__m256d)__builtin_ia32_gatherq_pd256((__v4df)__a, (const __v4df *)__m, \ 10245283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__v4di)__i, (__v4df)__mask, (s)); }) 10255283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren 10265283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \ 10275283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128 __a = (a); \ 10285283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren float const *__m = (m); \ 10295283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128i __i = (i); \ 10305283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128 __mask = (mask); \ 10315283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__m128)__builtin_ia32_gatherd_ps((__v4sf)__a, (const __v4sf *)__m, \ 10325283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__v4si)__i, (__v4sf)__mask, (s)); }) 10335283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren 10345283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm256_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \ 10355283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256 __a = (a); \ 10365283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren float const *__m = (m); \ 10375283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256i __i = (i); \ 10385283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256 __mask = (mask); \ 10395283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__m256)__builtin_ia32_gatherd_ps256((__v8sf)__a, (const __v8sf *)__m, \ 10405283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__v8si)__i, (__v8sf)__mask, (s)); }) 10415283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren 10425283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \ 10435283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128 __a = (a); \ 10445283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren float const *__m = (m); \ 10455283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128i __i = (i); \ 10465283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128 __mask = (mask); \ 10475283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__m128)__builtin_ia32_gatherq_ps((__v4sf)__a, (const __v4sf *)__m, \ 10485283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__v2di)__i, (__v4sf)__mask, (s)); }) 10495283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren 10505283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm256_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \ 1051c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128 __a = (a); \ 10525283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren float const *__m = (m); \ 10535283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256i __i = (i); \ 1054c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128 __mask = (mask); \ 1055c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m128)__builtin_ia32_gatherq_ps256((__v4sf)__a, (const __v4sf *)__m, \ 1056c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v4di)__i, (__v4sf)__mask, (s)); }) 1057c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren 1058c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \ 1059c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __a = (a); \ 1060c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren int const *__m = (m); \ 1061c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __i = (i); \ 1062c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __mask = (mask); \ 1063c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m128i)__builtin_ia32_gatherd_d((__v4si)__a, (const __v4si *)__m, \ 1064c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v4si)__i, (__v4si)__mask, (s)); }) 1065c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren 1066c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm256_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \ 1067c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __a = (a); \ 1068c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren int const *__m = (m); \ 1069c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __i = (i); \ 1070c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __mask = (mask); \ 1071c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m256i)__builtin_ia32_gatherd_d256((__v8si)__a, (const __v8si *)__m, \ 1072c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v8si)__i, (__v8si)__mask, (s)); }) 1073c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren 1074c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \ 1075c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __a = (a); \ 1076c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren int const *__m = (m); \ 1077c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __i = (i); \ 1078c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __mask = (mask); \ 1079c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m128i)__builtin_ia32_gatherq_d((__v4si)__a, (const __v4si *)__m, \ 1080c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v2di)__i, (__v4si)__mask, (s)); }) 1081c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren 1082c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm256_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \ 1083c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __a = (a); \ 1084c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren int const *__m = (m); \ 1085c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __i = (i); \ 1086c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __mask = (mask); \ 1087c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m128i)__builtin_ia32_gatherq_d256((__v4si)__a, (const __v4si *)__m, \ 1088c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v4di)__i, (__v4si)__mask, (s)); }) 1089c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren 1090c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \ 1091c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __a = (a); \ 10929b2caf7e4a8fe26bf1abd0bf1bf223209be13c2fEli Friedman long long const *__m = (m); \ 1093c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __i = (i); \ 1094c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __mask = (mask); \ 1095c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m128i)__builtin_ia32_gatherd_q((__v2di)__a, (const __v2di *)__m, \ 1096c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v4si)__i, (__v2di)__mask, (s)); }) 1097c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren 1098c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm256_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \ 1099c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __a = (a); \ 11009b2caf7e4a8fe26bf1abd0bf1bf223209be13c2fEli Friedman long long const *__m = (m); \ 1101c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __i = (i); \ 1102c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __mask = (mask); \ 1103c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m256i)__builtin_ia32_gatherd_q256((__v4di)__a, (const __v4di *)__m, \ 1104c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v4si)__i, (__v4di)__mask, (s)); }) 1105c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren 1106c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \ 1107c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __a = (a); \ 11089b2caf7e4a8fe26bf1abd0bf1bf223209be13c2fEli Friedman long long const *__m = (m); \ 1109c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __i = (i); \ 1110c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __mask = (mask); \ 1111c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m128i)__builtin_ia32_gatherq_q((__v2di)__a, (const __v2di *)__m, \ 1112c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v2di)__i, (__v2di)__mask, (s)); }) 1113c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren 1114c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm256_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \ 1115c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __a = (a); \ 11169b2caf7e4a8fe26bf1abd0bf1bf223209be13c2fEli Friedman long long const *__m = (m); \ 1117c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __i = (i); \ 1118c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __mask = (mask); \ 1119c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m256i)__builtin_ia32_gatherq_q256((__v4di)__a, (const __v4di *)__m, \ 1120c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v4di)__i, (__v4di)__mask, (s)); }) 112156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 112256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i32gather_pd(m, i, s) __extension__ ({ \ 112356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren double const *__m = (m); \ 112456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 112556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_setzero_pd(), \ 112656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v2df *)__m, (__v4si)__i, \ 112756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v2df)_mm_set1_pd((double)(long long int)-1), (s)); }) 112856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 112956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i32gather_pd(m, i, s) __extension__ ({ \ 113056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren double const *__m = (m); \ 113156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 113256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_setzero_pd(), \ 113356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4df *)__m, (__v4si)__i, \ 113456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4df)_mm256_set1_pd((double)(long long int)-1), (s)); }) 113556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 113656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i64gather_pd(m, i, s) __extension__ ({ \ 113756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren double const *__m = (m); \ 113856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 113956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_setzero_pd(), \ 114056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v2df *)__m, (__v2di)__i, \ 114156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v2df)_mm_set1_pd((double)(long long int)-1), (s)); }) 114256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 114356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i64gather_pd(m, i, s) __extension__ ({ \ 114456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren double const *__m = (m); \ 114556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m256i __i = (i); \ 114656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_setzero_pd(), \ 114756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4df *)__m, (__v4di)__i, \ 114856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4df)_mm256_set1_pd((double)(long long int)-1), (s)); }) 114956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 115056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i32gather_ps(m, i, s) __extension__ ({ \ 115156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren float const *__m = (m); \ 115256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 115356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_setzero_ps(), \ 115456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4sf *)__m, (__v4si)__i, \ 115556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4sf)_mm_set1_ps((float)(int)-1), (s)); }) 115656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 115756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i32gather_ps(m, i, s) __extension__ ({ \ 115856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren float const *__m = (m); \ 115956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m256i __i = (i); \ 116056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_setzero_ps(), \ 116156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v8sf *)__m, (__v8si)__i, \ 116256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v8sf)_mm256_set1_ps((float)(int)-1), (s)); }) 116356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 116456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i64gather_ps(m, i, s) __extension__ ({ \ 116556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren float const *__m = (m); \ 116656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 116756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_setzero_ps(), \ 116856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4sf *)__m, (__v2di)__i, \ 116956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4sf)_mm_set1_ps((float)(int)-1), (s)); }) 117056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 117156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i64gather_ps(m, i, s) __extension__ ({ \ 117256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren float const *__m = (m); \ 117356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m256i __i = (i); \ 117456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_setzero_ps(), \ 117556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4sf *)__m, (__v4di)__i, \ 117656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4sf)_mm_set1_ps((float)(int)-1), (s)); }) 117756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 117856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i32gather_epi32(m, i, s) __extension__ ({ \ 117956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren int const *__m = (m); \ 118056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 118156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_setzero_si128(), \ 118256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4si *)__m, (__v4si)__i, \ 118356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4si)_mm_set1_epi32(-1), (s)); }) 118456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 118556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i32gather_epi32(m, i, s) __extension__ ({ \ 118656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren int const *__m = (m); \ 118756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m256i __i = (i); \ 118856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_setzero_si256(), \ 118956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v8si *)__m, (__v8si)__i, \ 119056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v8si)_mm256_set1_epi32(-1), (s)); }) 119156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 119256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i64gather_epi32(m, i, s) __extension__ ({ \ 119356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren int const *__m = (m); \ 119456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 119556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_setzero_si128(), \ 119656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4si *)__m, (__v2di)__i, \ 119756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4si)_mm_set1_epi32(-1), (s)); }) 119856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 119956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i64gather_epi32(m, i, s) __extension__ ({ \ 120056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren int const *__m = (m); \ 120156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m256i __i = (i); \ 120256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_setzero_si128(), \ 120356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4si *)__m, (__v4di)__i, \ 120456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4si)_mm_set1_epi32(-1), (s)); }) 120556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 120656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i32gather_epi64(m, i, s) __extension__ ({ \ 12079b2caf7e4a8fe26bf1abd0bf1bf223209be13c2fEli Friedman long long const *__m = (m); \ 120856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 120956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_setzero_si128(), \ 121056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v2di *)__m, (__v4si)__i, \ 121156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v2di)_mm_set1_epi64x(-1), (s)); }) 121256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 121356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i32gather_epi64(m, i, s) __extension__ ({ \ 12149b2caf7e4a8fe26bf1abd0bf1bf223209be13c2fEli Friedman long long const *__m = (m); \ 121556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 121656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_setzero_si256(), \ 121756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4di *)__m, (__v4si)__i, \ 121856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4di)_mm256_set1_epi64x(-1), (s)); }) 121956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 122056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i64gather_epi64(m, i, s) __extension__ ({ \ 12219b2caf7e4a8fe26bf1abd0bf1bf223209be13c2fEli Friedman long long const *__m = (m); \ 122256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 122356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_setzero_si128(), \ 122456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v2di *)__m, (__v2di)__i, \ 122556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v2di)_mm_set1_epi64x(-1), (s)); }) 122656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 122756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i64gather_epi64(m, i, s) __extension__ ({ \ 12289b2caf7e4a8fe26bf1abd0bf1bf223209be13c2fEli Friedman long long const *__m = (m); \ 122956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m256i __i = (i); \ 123056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_setzero_si256(), \ 123156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4di *)__m, (__v4di)__i, \ 123256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4di)_mm256_set1_epi64x(-1), (s)); }) 12337cb4fae8da8f541f43d39896c989b06c69fd7821Richard Smith 12347cb4fae8da8f541f43d39896c989b06c69fd7821Richard Smith#endif /* __AVX2INTRIN_H */ 1235