1925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper/*===---- avx2intrin.h - AVX2 intrinsics -----------------------------------=== 2925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * 3925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * Permission is hereby granted, free of charge, to any person obtaining a copy 4925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * of this software and associated documentation files (the "Software"), to deal 5925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * in the Software without restriction, including without limitation the rights 6925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * copies of the Software, and to permit persons to whom the Software is 8925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * furnished to do so, subject to the following conditions: 9925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * 10925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * The above copyright notice and this permission notice shall be included in 11925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * all copies or substantial portions of the Software. 12925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * 13925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * THE SOFTWARE. 20925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * 21925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper *===-----------------------------------------------------------------------=== 22925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper */ 23925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 24925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper#ifndef __IMMINTRIN_H 25925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper#error "Never use <avx2intrin.h> directly; include <immintrin.h> instead." 26925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper#endif 27925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 28925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper/* SSE4 Multiple Packed Sums of Absolute Difference. */ 29925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper#define _mm256_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw256((X), (Y), (M)) 30925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 31925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 32925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_abs_epi8(__m256i a) 33925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 34925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)__builtin_ia32_pabsb256((__v32qi)a); 35925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 36925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 37925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 38925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_abs_epi16(__m256i a) 39925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 40925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)__builtin_ia32_pabsw256((__v16hi)a); 41925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 42925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 43925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 44925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_abs_epi32(__m256i a) 45925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 46925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)__builtin_ia32_pabsd256((__v8si)a); 47925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 48925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 49925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 50925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_packs_epi16(__m256i a, __m256i b) 51925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 52925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)__builtin_ia32_packsswb256((__v16hi)a, (__v16hi)b); 53925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 54925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 55925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 56925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_packs_epi32(__m256i a, __m256i b) 57925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 58925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)__builtin_ia32_packssdw256((__v8si)a, (__v8si)b); 59925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 60925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 61925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 62925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_packus_epi16(__m256i a, __m256i b) 63925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 64925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)__builtin_ia32_packuswb256((__v16hi)a, (__v16hi)b); 65925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 66925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 67925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 68925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_packus_epi32(__m256i __V1, __m256i __V2) 69925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 70925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2); 71925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 72925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 73925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 74925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_add_epi8(__m256i a, __m256i b) 75925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 76925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)((__v32qi)a + (__v32qi)b); 77925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 78925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 79925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 80925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_add_epi16(__m256i a, __m256i b) 81925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 82925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)((__v16hi)a + (__v16hi)b); 83925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 84925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 85925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 86925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_add_epi32(__m256i a, __m256i b) 87925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 88925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)((__v8si)a + (__v8si)b); 89925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 90925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 91925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 92925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_add_epi64(__m256i a, __m256i b) 93925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 94925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return a + b; 95925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 96925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 97925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 989c2ffd803af03f1728423d0d73ff87d988642633Craig Topper_mm256_adds_epi8(__m256i a, __m256i b) 999c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 1009c2ffd803af03f1728423d0d73ff87d988642633Craig Topper return (__m256i)__builtin_ia32_paddsb256((__v32qi)a, (__v32qi)b); 1019c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 1029c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 1039c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1049c2ffd803af03f1728423d0d73ff87d988642633Craig Topper_mm256_adds_epi16(__m256i a, __m256i b) 1059c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 1069c2ffd803af03f1728423d0d73ff87d988642633Craig Topper return (__m256i)__builtin_ia32_paddsw256((__v16hi)a, (__v16hi)b); 1079c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 1089c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 1099c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1109c2ffd803af03f1728423d0d73ff87d988642633Craig Topper_mm256_adds_epu8(__m256i a, __m256i b) 1119c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 1129c2ffd803af03f1728423d0d73ff87d988642633Craig Topper return (__m256i)__builtin_ia32_paddusb256((__v32qi)a, (__v32qi)b); 1139c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 1149c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 1159c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1169c2ffd803af03f1728423d0d73ff87d988642633Craig Topper_mm256_adds_epu16(__m256i a, __m256i b) 1179c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 1189c2ffd803af03f1728423d0d73ff87d988642633Craig Topper return (__m256i)__builtin_ia32_paddusw256((__v16hi)a, (__v16hi)b); 1199c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 1209c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 1219c2ffd803af03f1728423d0d73ff87d988642633Craig Topper#define _mm256_alignr_epi8(a, b, n) __extension__ ({ \ 1229c2ffd803af03f1728423d0d73ff87d988642633Craig Topper __m256i __a = (a); \ 1239c2ffd803af03f1728423d0d73ff87d988642633Craig Topper __m256i __b = (b); \ 1249c2ffd803af03f1728423d0d73ff87d988642633Craig Topper (__m256i)__builtin_ia32_palignr256((__v32qi)__a, (__v32qi)__b, (n)); }) 1259c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 1269c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 127735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper_mm256_and_si256(__m256i a, __m256i b) 128735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper{ 129735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper return a & b; 130735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper} 131735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper 132735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 133735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper_mm256_andnot_si256(__m256i a, __m256i b) 134735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper{ 135735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper return ~a & b; 136735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper} 137735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper 138735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1394c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_avg_epu8(__m256i a, __m256i b) 1404c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1414c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper return (__m256i)__builtin_ia32_pavgb256((__v32qi)a, (__v32qi)b); 1424c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1434c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1444c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1454c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_avg_epu16(__m256i a, __m256i b) 1464c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1474c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper return (__m256i)__builtin_ia32_pavgw256((__v16hi)a, (__v16hi)b); 1484c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1494c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1504c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1514c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M) 1524c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1534c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2, 1544c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper (__v32qi)__M); 1554c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1564c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1574c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper#define _mm256_blend_epi16(V1, V2, M) __extension__ ({ \ 1584c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper __m256i __V1 = (V1); \ 1594c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper __m256i __V2 = (V2); \ 1605aeaca3fa755cddba583842e7a0c3e168bf71b4dCraig Topper (__m256i)__builtin_ia32_pblendw256((__v16hi)__V1, (__v16hi)__V2, (M)); }) 1614c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1624c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1634c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_cmpeq_epi8(__m256i a, __m256i b) 1644c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1654c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper return (__m256i)((__v32qi)a == (__v32qi)b); 1664c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1674c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1684c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1694c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_cmpeq_epi16(__m256i a, __m256i b) 1704c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1714c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper return (__m256i)((__v16hi)a == (__v16hi)b); 1724c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1734c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1744c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1754c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_cmpeq_epi32(__m256i a, __m256i b) 1764c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1774c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper return (__m256i)((__v8si)a == (__v8si)b); 1784c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1794c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1804c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1814c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_cmpeq_epi64(__m256i a, __m256i b) 1824c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1835cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m256i)(a == b); 1844c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1854c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1864c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1874c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_cmpgt_epi8(__m256i a, __m256i b) 1884c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1894c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper return (__m256i)((__v32qi)a > (__v32qi)b); 1904c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1914c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1924c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1934c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_cmpgt_epi16(__m256i a, __m256i b) 1944c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1954c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper return (__m256i)((__v16hi)a > (__v16hi)b); 1964c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1974c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1984c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1994c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_cmpgt_epi32(__m256i a, __m256i b) 2004c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 2014c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper return (__m256i)((__v8si)a > (__v8si)b); 2024c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 2034c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 2044c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2054c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_cmpgt_epi64(__m256i a, __m256i b) 2064c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 2075cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m256i)(a > b); 2084c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 2094c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 2104c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 211318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper_mm256_hadd_epi16(__m256i a, __m256i b) 212318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{ 213318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper return (__m256i)__builtin_ia32_phaddw256((__v16hi)a, (__v16hi)b); 214318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper} 215318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper 216318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 217318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper_mm256_hadd_epi32(__m256i a, __m256i b) 218318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{ 219318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper return (__m256i)__builtin_ia32_phaddd256((__v8si)a, (__v8si)b); 220318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper} 221318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper 222318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 223318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper_mm256_hadds_epi16(__m256i a, __m256i b) 224318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{ 225318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper return (__m256i)__builtin_ia32_phaddsw256((__v16hi)a, (__v16hi)b); 226318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper} 227318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper 228318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 229318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper_mm256_hsub_epi16(__m256i a, __m256i b) 230318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{ 231318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper return (__m256i)__builtin_ia32_phsubw256((__v16hi)a, (__v16hi)b); 232318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper} 233318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper 234318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 235318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper_mm256_hsub_epi32(__m256i a, __m256i b) 236318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{ 237318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper return (__m256i)__builtin_ia32_phsubd256((__v8si)a, (__v8si)b); 238318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper} 239318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper 240318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 241318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper_mm256_hsubs_epi16(__m256i a, __m256i b) 242318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{ 243318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper return (__m256i)__builtin_ia32_phsubsw256((__v16hi)a, (__v16hi)b); 244318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper} 245318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper 246318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2474a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper_mm256_maddubs_epi16(__m256i a, __m256i b) 2484a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper{ 2494a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)a, (__v32qi)b); 2504a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper} 2514a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper 2524a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2534a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper_mm256_madd_epi16(__m256i a, __m256i b) 2544a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper{ 2554a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)a, (__v16hi)b); 2564a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper} 2574a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper 2584a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 259231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_max_epi8(__m256i a, __m256i b) 260231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 261231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)a, (__v32qi)b); 262231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 263231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 264231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 265231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_max_epi16(__m256i a, __m256i b) 266231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 267231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)a, (__v16hi)b); 268231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 269231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 270231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 271231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_max_epi32(__m256i a, __m256i b) 272231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 273231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmaxsd256((__v8si)a, (__v8si)b); 274231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 275231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 276231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 277231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_max_epu8(__m256i a, __m256i b) 278231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 279231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmaxub256((__v32qi)a, (__v32qi)b); 280231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 281231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 282231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 283231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_max_epu16(__m256i a, __m256i b) 284231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 285231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)a, (__v16hi)b); 286231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 287231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 288231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 289231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_max_epu32(__m256i a, __m256i b) 290231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 291231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmaxud256((__v8si)a, (__v8si)b); 292231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 293231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 294231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 295231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_min_epi8(__m256i a, __m256i b) 296231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 297231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pminsb256((__v32qi)a, (__v32qi)b); 298231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 299231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 300231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 301231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_min_epi16(__m256i a, __m256i b) 302231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 303231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pminsw256((__v16hi)a, (__v16hi)b); 304231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 305231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 306231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 307231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_min_epi32(__m256i a, __m256i b) 308231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 309231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pminsd256((__v8si)a, (__v8si)b); 310231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 311231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 312231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 313231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_min_epu8(__m256i a, __m256i b) 314231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 315231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pminub256((__v32qi)a, (__v32qi)b); 316231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 317231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 318231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 319231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_min_epu16(__m256i a, __m256i b) 320231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 321231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)a, (__v16hi)b); 322231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 323231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 324231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 325231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_min_epu32(__m256i a, __m256i b) 326231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 327231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pminud256((__v8si)a, (__v8si)b); 328231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 329231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 330231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 331231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_movemask_epi8(__m256i a) 332231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 333231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return __builtin_ia32_pmovmskb256((__v32qi)a); 334231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 335231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 336231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 337231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi8_epi16(__m128i __V) 338231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 339231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovsxbw256((__v16qi)__V); 340231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 341231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 342231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 343231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi8_epi32(__m128i __V) 344231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 345231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovsxbd256((__v16qi)__V); 346231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 347231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 348231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 349231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi8_epi64(__m128i __V) 350231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 351231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovsxbq256((__v16qi)__V); 352231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 353231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 354231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 355231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi16_epi32(__m128i __V) 356231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 357231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovsxwd256((__v8hi)__V); 358231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 359231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 360231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 361231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi16_epi64(__m128i __V) 362231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 363231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovsxwq256((__v8hi)__V); 364231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 365231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 366231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 367231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi32_epi64(__m128i __V) 368231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 369231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovsxdq256((__v4si)__V); 370231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 371231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 372231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 373231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu8_epi16(__m128i __V) 374231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 375231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovzxbw256((__v16qi)__V); 376231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 377231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 378231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 379231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu8_epi32(__m128i __V) 380231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 381231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovzxbd256((__v16qi)__V); 382231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 383231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 384231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 385231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu8_epi64(__m128i __V) 386231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 387231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovzxbq256((__v16qi)__V); 388231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 389231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 390231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 391231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu16_epi32(__m128i __V) 392231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 393231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovzxwd256((__v8hi)__V); 394231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 395231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 396231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 397231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu16_epi64(__m128i __V) 398231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 399231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovzxwq256((__v8hi)__V); 400231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 401231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 402231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 403231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu32_epi64(__m128i __V) 404231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 405231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovzxdq256((__v4si)__V); 406231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 407231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 40828a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 40928a324a30b0677309a4c5d73ef5197398265e129Craig Topper_mm256_mul_epi32(__m256i a, __m256i b) 41028a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 41128a324a30b0677309a4c5d73ef5197398265e129Craig Topper return (__m256i)__builtin_ia32_pmuldq256((__v8si)a, (__v8si)b); 41228a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 41328a324a30b0677309a4c5d73ef5197398265e129Craig Topper 41428a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 41528a324a30b0677309a4c5d73ef5197398265e129Craig Topper_mm256_mulhrs_epi16(__m256i a, __m256i b) 41628a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 41728a324a30b0677309a4c5d73ef5197398265e129Craig Topper return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)a, (__v16hi)b); 41828a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 41928a324a30b0677309a4c5d73ef5197398265e129Craig Topper 42028a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 42128a324a30b0677309a4c5d73ef5197398265e129Craig Topper_mm256_mulhi_epu16(__m256i a, __m256i b) 42228a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 42328a324a30b0677309a4c5d73ef5197398265e129Craig Topper return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)a, (__v16hi)b); 42428a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 42528a324a30b0677309a4c5d73ef5197398265e129Craig Topper 42628a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 42728a324a30b0677309a4c5d73ef5197398265e129Craig Topper_mm256_mulhi_epi16(__m256i a, __m256i b) 42828a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 42928a324a30b0677309a4c5d73ef5197398265e129Craig Topper return (__m256i)__builtin_ia32_pmulhw256((__v16hi)a, (__v16hi)b); 43028a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 43128a324a30b0677309a4c5d73ef5197398265e129Craig Topper 43228a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 43328a324a30b0677309a4c5d73ef5197398265e129Craig Topper_mm256_mullo_epi16(__m256i a, __m256i b) 43428a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 43528a324a30b0677309a4c5d73ef5197398265e129Craig Topper return (__m256i)((__v16hi)a * (__v16hi)b); 43628a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 43728a324a30b0677309a4c5d73ef5197398265e129Craig Topper 43828a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 43928a324a30b0677309a4c5d73ef5197398265e129Craig Topper_mm256_mullo_epi32 (__m256i a, __m256i b) 44028a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 44128a324a30b0677309a4c5d73ef5197398265e129Craig Topper return (__m256i)((__v8si)a * (__v8si)b); 44228a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 44328a324a30b0677309a4c5d73ef5197398265e129Craig Topper 44428a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 44528a324a30b0677309a4c5d73ef5197398265e129Craig Topper_mm256_mul_epu32(__m256i a, __m256i b) 44628a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 44728a324a30b0677309a4c5d73ef5197398265e129Craig Topper return __builtin_ia32_pmuludq256((__v8si)a, (__v8si)b); 44828a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 44928a324a30b0677309a4c5d73ef5197398265e129Craig Topper 450231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 451735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper_mm256_or_si256(__m256i a, __m256i b) 452735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper{ 453735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper return a | b; 454735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper} 455735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper 456735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 457cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sad_epu8(__m256i a, __m256i b) 458cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 459cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return __builtin_ia32_psadbw256((__v32qi)a, (__v32qi)b); 460cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 461cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 462cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 463cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_shuffle_epi8(__m256i a, __m256i b) 464cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 465cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_pshufb256((__v32qi)a, (__v32qi)b); 466cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 467cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 468cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper#define _mm256_shuffle_epi32(a, imm) __extension__ ({ \ 469cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper __m256i __a = (a); \ 470cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)_mm256_set1_epi32(0), \ 471cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (imm) & 0x3, ((imm) & 0xc) >> 2, \ 472cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \ 473cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0x03) >> 0), \ 474cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0x0c) >> 2), \ 475cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0x30) >> 4), \ 476cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0xc0) >> 6)); }) 477cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 478cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper#define _mm256_shufflehi_epi16(a, imm) __extension__ ({ \ 479cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper __m256i __a = (a); \ 480cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)_mm256_set1_epi16(0), \ 481cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 0, 1, 2, 3, \ 482cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0x03) >> 0), \ 483cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0x0c) >> 2), \ 484cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0x30) >> 4), \ 485cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0xc0) >> 6), \ 486cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 8, 9, 10, 11, \ 487cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 12 + (((imm) & 0x03) >> 0), \ 488cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 12 + (((imm) & 0x0c) >> 2), \ 489cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 12 + (((imm) & 0x30) >> 4), \ 490cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 12 + (((imm) & 0xc0) >> 6)); }) 491cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 492cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper#define _mm256_shufflelo_epi16(a, imm) __extension__ ({ \ 493cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper __m256i __a = (a); \ 494cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)_mm256_set1_epi16(0), \ 495cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (imm) & 0x3,((imm) & 0xc) >> 2, \ 496cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \ 497cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4, 5, 6, 7, \ 498cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 8 + (((imm) & 0x03) >> 0), \ 499cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 8 + (((imm) & 0x0c) >> 2), \ 500cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 8 + (((imm) & 0x30) >> 4), \ 501cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 8 + (((imm) & 0xc0) >> 6), \ 502cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 12, 13, 14, 15); }) 503cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 504cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 505cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sign_epi8(__m256i a, __m256i b) 506cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 507cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_psignb256((__v32qi)a, (__v32qi)b); 508cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 509cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 510cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 511cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sign_epi16(__m256i a, __m256i b) 512cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 513cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_psignw256((__v16hi)a, (__v16hi)b); 514cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 515cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 516cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 517cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sign_epi32(__m256i a, __m256i b) 518cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 519cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_psignd256((__v8si)a, (__v8si)b); 520cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 521cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 522cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper#define _mm256_slli_si256(a, count) __extension__ ({ \ 523cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper __m256i __a = (a); \ 524cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (__m256i)__builtin_ia32_pslldqi256(__a, (count)*8); }) 525cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 526cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 527cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_slli_epi16(__m256i a, int count) 528cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 529cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_psllwi256((__v16hi)a, count); 530cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 531cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 532cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 533cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sll_epi16(__m256i a, __m128i count) 534cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 535cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_psllw256((__v16hi)a, (__v8hi)count); 536cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 537cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 538cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 539cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_slli_epi32(__m256i a, int count) 540cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 541cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_pslldi256((__v8si)a, count); 542cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 543cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 544cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 545cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sll_epi32(__m256i a, __m128i count) 546cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 547cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_pslld256((__v8si)a, (__v4si)count); 548cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 549cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 550cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 551cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_slli_epi64(__m256i a, int count) 552cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 553cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return __builtin_ia32_psllqi256(a, count); 554cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 555cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 556cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 557cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sll_epi64(__m256i a, __m128i count) 558cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 559cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return __builtin_ia32_psllq256(a, count); 560cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 561cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 562cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 563cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_srai_epi16(__m256i a, int count) 564cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 565cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_psrawi256((__v16hi)a, count); 566cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 567cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 568cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 569cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sra_epi16(__m256i a, __m128i count) 570cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 571cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_psraw256((__v16hi)a, (__v8hi)count); 572cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 573cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 574cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 575cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_srai_epi32(__m256i a, int count) 576cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 577cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_psradi256((__v8si)a, count); 578cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 579cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 580cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 581cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sra_epi32(__m256i a, __m128i count) 582cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 583cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_psrad256((__v8si)a, (__v4si)count); 584cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 585cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 586cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper#define _mm256_srli_si256(a, count) __extension__ ({ \ 587cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper __m256i __a = (a); \ 588cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (__m256i)__builtin_ia32_psrldqi256(__a, (count)*8); }) 589cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 590cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 591cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_srli_epi16(__m256i a, int count) 592cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 593cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_psrlwi256((__v16hi)a, count); 594cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 595cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 596cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 597cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_srl_epi16(__m256i a, __m128i count) 598cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 599cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_psrlw256((__v16hi)a, (__v8hi)count); 600cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 601cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 602cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 603cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_srli_epi32(__m256i a, int count) 604cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 605cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_psrldi256((__v8si)a, count); 606cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 607cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 608cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 609cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_srl_epi32(__m256i a, __m128i count) 610cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 611cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_psrld256((__v8si)a, (__v4si)count); 612cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 613cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 614cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 615cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_srli_epi64(__m256i a, int count) 616cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 617cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return __builtin_ia32_psrlqi256(a, count); 618cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 619cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 620cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 621cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_srl_epi64(__m256i a, __m128i count) 622cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 623cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return __builtin_ia32_psrlq256(a, count); 624cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 625cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 626cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 627925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_sub_epi8(__m256i a, __m256i b) 628925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 629925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)((__v32qi)a - (__v32qi)b); 630925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 631925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 632925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 633925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_sub_epi16(__m256i a, __m256i b) 634925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 635925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)((__v16hi)a - (__v16hi)b); 636925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 637925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 638925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 639925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_sub_epi32(__m256i a, __m256i b) 640925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 641925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)((__v8si)a - (__v8si)b); 642925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 643925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 644925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 645925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_sub_epi64(__m256i a, __m256i b) 646925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 647925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return a - b; 648925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 6499c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 6509c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6519c2ffd803af03f1728423d0d73ff87d988642633Craig Topper_mm256_subs_epi8(__m256i a, __m256i b) 6529c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 6539c2ffd803af03f1728423d0d73ff87d988642633Craig Topper return (__m256i)__builtin_ia32_psubsb256((__v32qi)a, (__v32qi)b); 6549c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 6559c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 6569c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6579c2ffd803af03f1728423d0d73ff87d988642633Craig Topper_mm256_subs_epi16(__m256i a, __m256i b) 6589c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 6599c2ffd803af03f1728423d0d73ff87d988642633Craig Topper return (__m256i)__builtin_ia32_psubsw256((__v16hi)a, (__v16hi)b); 6609c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 6619c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 6629c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6639c2ffd803af03f1728423d0d73ff87d988642633Craig Topper_mm256_subs_epu8(__m256i a, __m256i b) 6649c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 6659c2ffd803af03f1728423d0d73ff87d988642633Craig Topper return (__m256i)__builtin_ia32_psubusb256((__v32qi)a, (__v32qi)b); 6669c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 6679c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 6689c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6699c2ffd803af03f1728423d0d73ff87d988642633Craig Topper_mm256_subs_epu16(__m256i a, __m256i b) 6709c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 6719c2ffd803af03f1728423d0d73ff87d988642633Craig Topper return (__m256i)__builtin_ia32_psubusw256((__v16hi)a, (__v16hi)b); 6729c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 6739c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 674735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6757f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper_mm256_unpackhi_epi8(__m256i a, __m256i b) 6767f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{ 6777f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper return (__m256i)__builtin_shufflevector((__v32qi)a, (__v32qi)b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31); 6787f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper} 6797f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper 6807f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6817f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper_mm256_unpackhi_epi16(__m256i a, __m256i b) 6827f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{ 6837f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper return (__m256i)__builtin_shufflevector((__v16hi)a, (__v16hi)b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15); 6847f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper} 6857f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper 6867f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6877f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper_mm256_unpackhi_epi32(__m256i a, __m256i b) 6887f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{ 6897f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper return (__m256i)__builtin_shufflevector((__v8si)a, (__v8si)b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7); 6907f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper} 6917f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper 6927f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6937f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper_mm256_unpackhi_epi64(__m256i a, __m256i b) 6947f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{ 6957f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper return (__m256i)__builtin_shufflevector(a, b, 1, 4+1, 3, 4+3); 6967f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper} 6977f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper 6987f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6997f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper_mm256_unpacklo_epi8(__m256i a, __m256i b) 7007f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{ 7017f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper return (__m256i)__builtin_shufflevector((__v32qi)a, (__v32qi)b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23); 7027f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper} 7037f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper 7047f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 7057f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper_mm256_unpacklo_epi16(__m256i a, __m256i b) 7067f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{ 7077f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper return (__m256i)__builtin_shufflevector((__v16hi)a, (__v16hi)b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11); 7087f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper} 7097f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper 7107f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 7117f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper_mm256_unpacklo_epi32(__m256i a, __m256i b) 7127f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{ 7137f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper return (__m256i)__builtin_shufflevector((__v8si)a, (__v8si)b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5); 7147f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper} 7157f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper 7167f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 7177f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper_mm256_unpacklo_epi64(__m256i a, __m256i b) 7187f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{ 7197f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper return (__m256i)__builtin_shufflevector(a, b, 0, 4+0, 2, 4+2); 7207f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper} 7217f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper 7227f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 723735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper_mm256_xor_si256(__m256i a, __m256i b) 724735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper{ 725735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper return a ^ b; 726735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper} 727ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper 728ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 729ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper_mm256_stream_load_si256(__m256i *__V) 730ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper{ 731ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper return (__m256i)__builtin_ia32_movntdqa256((__v4di *)__V); 732ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper} 733ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper 734ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topperstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 735ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper_mm_broadcastss_ps(__m128 __X) 736ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper{ 737ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper return (__m128)__builtin_ia32_vbroadcastss_ps((__v4sf)__X); 738ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper} 739ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper 740ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topperstatic __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 741ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper_mm256_broadcastss_ps(__m128 __X) 742ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper{ 743ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper return (__m256)__builtin_ia32_vbroadcastss_ps256((__v4sf)__X); 744ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper} 745ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper 746ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topperstatic __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) 747ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper_mm256_broadcastsd_pd(__m128d __X) 748ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper{ 749ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper return (__m256d)__builtin_ia32_vbroadcastsd_pd256((__v2df)__X); 750ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper} 751ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper 752ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 753ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper_mm_broadcastsi128_si256(__m128i const *a) 754ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper{ 755ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper return (__m256i)__builtin_ia32_vbroadcastsi256(a); 756ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper} 757ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper 758ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper#define _mm_blend_epi32(V1, V2, M) __extension__ ({ \ 759ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper __m128i __V1 = (V1); \ 760ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper __m128i __V2 = (V2); \ 76134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper (__m128i)__builtin_ia32_pblendd128((__v4si)__V1, (__v4si)__V2, (M)); }) 762ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper 763ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper#define _mm256_blend_epi32(V1, V2, M) __extension__ ({ \ 764ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper __m256i __V1 = (V1); \ 765ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper __m256i __V2 = (V2); \ 76634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper (__m256i)__builtin_ia32_pblendd256((__v8si)__V1, (__v8si)__V2, (M)); }) 76734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 76834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 76934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm256_broadcastb_epi8(__m128i __X) 77034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 77134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper return (__m256i)__builtin_ia32_pbroadcastb256((__v16qi)__X); 77234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 77334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 77434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 77534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm256_broadcastw_epi16(__m128i __X) 77634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 77734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper return (__m256i)__builtin_ia32_pbroadcastw256((__v8hi)__X); 77834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 77934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 78034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 78134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm256_broadcastd_epi32(__m128i __X) 78234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 78334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper return (__m256i)__builtin_ia32_pbroadcastd256((__v4si)__X); 78434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 78534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 78634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 78734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm256_broadcastq_epi64(__m128i __X) 78834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 78934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper return (__m256i)__builtin_ia32_pbroadcastq256(__X); 79034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 79134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 79234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 79334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm_broadcastb_epi8(__m128i __X) 79434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 79534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper return (__m128i)__builtin_ia32_pbroadcastb128((__v16qi)__X); 79634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 79734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 79834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 79934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm_broadcastw_epi16(__m128i __X) 80034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 80134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper return (__m128i)__builtin_ia32_pbroadcastw128((__v8hi)__X); 80234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 80334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 80434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 80534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 80634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm_broadcastd_epi32(__m128i __X) 80734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 80834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper return (__m128i)__builtin_ia32_pbroadcastd128((__v4si)__X); 80934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 81034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 81134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 81234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm_broadcastq_epi64(__m128i __X) 81334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 81434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper return (__m128i)__builtin_ia32_pbroadcastq128(__X); 81534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 81634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 81734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 81834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm256_permutevar8x32_epi32(__m256i a, __m256i b) 81934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 82034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper return (__m256i)__builtin_ia32_permvarsi256((__v8si)a, (__v8si)b); 82134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 82234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 82334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper#define _mm256_permute4x64_pd(V, M) __extension__ ({ \ 82434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper __m256d __V = (V); \ 825b5491f3d7b688b37745397fafd6c1f10548fd5c2Craig Topper (__m256d)__builtin_shufflevector((__v4df)__V, (__v4df) _mm256_setzero_pd(), \ 826b5491f3d7b688b37745397fafd6c1f10548fd5c2Craig Topper (M) & 0x3, ((M) & 0xc) >> 2, \ 827b5491f3d7b688b37745397fafd6c1f10548fd5c2Craig Topper ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); }) 82834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 82934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 83034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm256_permutevar8x32_ps(__m256 a, __m256 b) 83134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{ 83234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper return (__m256)__builtin_ia32_permvarsf256((__v8sf)a, (__v8sf)b); 83334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper} 83434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 83534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper#define _mm256_permute4x64_epi64(V, M) __extension__ ({ \ 83634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper __m256i __V = (V); \ 837b5491f3d7b688b37745397fafd6c1f10548fd5c2Craig Topper (__m256i)__builtin_shufflevector((__v4di)__V, (__v4di) _mm256_setzero_si256(), \ 838b5491f3d7b688b37745397fafd6c1f10548fd5c2Craig Topper (M) & 0x3, ((M) & 0xc) >> 2, \ 839b5491f3d7b688b37745397fafd6c1f10548fd5c2Craig Topper ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); }) 84034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper 84134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper#define _mm256_permute2x128_si256(V1, V2, M) __extension__ ({ \ 84234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper __m256i __V1 = (V1); \ 84334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper __m256i __V2 = (V2); \ 84449a110db4c43835681bb89671f8f73c8d8c7c28cCraig Topper (__m256i)__builtin_ia32_permti256(__V1, __V2, (M)); }) 8455cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 8465cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper#define _mm256_extracti128_si256(A, O) __extension__ ({ \ 8475cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper __m256i __A = (A); \ 8485cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper (__m128i)__builtin_ia32_extract128i256(__A, (O)); }) 8495cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 8505cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper#define _mm256_inserti128_si256(V1, V2, O) __extension__ ({ \ 8515cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper __m256i __V1 = (V1); \ 8525cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper __m128i __V2 = (V2); \ 8535cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper (__m256i)__builtin_ia32_insert128i256(__V1, __V2, (O)); }) 8545cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 8555cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 8565cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_maskload_epi32(int const *__X, __m256i __M) 8575cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 8585cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m256i)__builtin_ia32_maskloadd256((const __v8si *)__X, (__v8si)__M); 8595cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 8605cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 8615cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 8625cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_maskload_epi64(long long const *__X, __m256i __M) 8635cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 8645cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, __M); 8655cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 8665cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 8675cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 8685cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_maskload_epi32(int const *__X, __m128i __M) 8695cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 8705cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m128i)__builtin_ia32_maskloadd((const __v4si *)__X, (__v4si)__M); 8715cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 8725cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 8735cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 8745cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_maskload_epi64(long long const *__X, __m128i __M) 8755cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 8765cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m128i)__builtin_ia32_maskloadq((const __v2di *)__X, (__v2di)__M); 8775cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 8785cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 8795cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 8805cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y) 8815cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 8825cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper __builtin_ia32_maskstored256((__v8si *)__X, (__v8si)__M, (__v8si)__Y); 8835cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 8845cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 8855cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 8865cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y) 8875cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 8885cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper __builtin_ia32_maskstoreq256((__v4di *)__X, __M, __Y); 8895cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 8905cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 8915cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 8925cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_maskstore_epi32(int *__X, __m128i __M, __m128i __Y) 8935cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 8945cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper __builtin_ia32_maskstored((__v4si *)__X, (__v4si)__M, (__v4si)__Y); 8955cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 8965cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 8975cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 8985cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y) 8995cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9005cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper __builtin_ia32_maskstoreq(( __v2di *)__X, __M, __Y); 9015cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9025cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9035cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 9045cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_sllv_epi32(__m256i __X, __m256i __Y) 9055cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9065cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y); 9075cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9085cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9095cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 9105cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_sllv_epi32(__m128i __X, __m128i __Y) 9115cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9125cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y); 9135cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9145cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9155cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 9165cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_sllv_epi64(__m256i __X, __m256i __Y) 9175cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9185cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m256i)__builtin_ia32_psllv4di(__X, __Y); 9195cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9205cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9215cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 9225cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_sllv_epi64(__m128i __X, __m128i __Y) 9235cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9245cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m128i)__builtin_ia32_psllv2di(__X, __Y); 9255cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9265cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9275cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 9285cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_srav_epi32(__m256i __X, __m256i __Y) 9295cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9305cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y); 9315cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9325cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9335cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 9345cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_srav_epi32(__m128i __X, __m128i __Y) 9355cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9365cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y); 9375cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9385cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9395cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 9405cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_srlv_epi32(__m256i __X, __m256i __Y) 9415cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9425cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y); 9435cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9445cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9455cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 9465cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_srlv_epi32(__m128i __X, __m128i __Y) 9475cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9485cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y); 9495cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9505cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9515cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 9525cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_srlv_epi64(__m256i __X, __m256i __Y) 9535cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9545cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m256i)__builtin_ia32_psrlv4di(__X, __Y); 9555cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9565cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper 9575cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 9585cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_srlv_epi64(__m128i __X, __m128i __Y) 9595cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{ 9605cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper return (__m128i)__builtin_ia32_psrlv2di(__X, __Y); 9615cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper} 9625283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren 9635283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \ 9645283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128d __a = (a); \ 9655283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren double const *__m = (m); \ 9665283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128i __i = (i); \ 9675283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128d __mask = (mask); \ 9685283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__m128d)__builtin_ia32_gatherd_pd((__v2df)__a, (const __v2df *)__m, \ 9695283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__v4si)__i, (__v2df)__mask, (s)); }) 9705283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren 9715283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm256_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \ 9725283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256d __a = (a); \ 9735283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren double const *__m = (m); \ 974c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __i = (i); \ 9755283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256d __mask = (mask); \ 9765283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__m256d)__builtin_ia32_gatherd_pd256((__v4df)__a, (const __v4df *)__m, \ 977c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v4si)__i, (__v4df)__mask, (s)); }) 9785283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren 9795283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \ 9805283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128d __a = (a); \ 9815283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren double const *__m = (m); \ 9825283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128i __i = (i); \ 9835283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128d __mask = (mask); \ 9845283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__m128d)__builtin_ia32_gatherq_pd((__v2df)__a, (const __v2df *)__m, \ 9855283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__v2di)__i, (__v2df)__mask, (s)); }) 9865283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren 9875283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm256_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \ 9885283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256d __a = (a); \ 9895283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren double const *__m = (m); \ 9905283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256i __i = (i); \ 9915283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256d __mask = (mask); \ 9925283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__m256d)__builtin_ia32_gatherq_pd256((__v4df)__a, (const __v4df *)__m, \ 9935283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__v4di)__i, (__v4df)__mask, (s)); }) 9945283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren 9955283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \ 9965283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128 __a = (a); \ 9975283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren float const *__m = (m); \ 9985283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128i __i = (i); \ 9995283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128 __mask = (mask); \ 10005283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__m128)__builtin_ia32_gatherd_ps((__v4sf)__a, (const __v4sf *)__m, \ 10015283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__v4si)__i, (__v4sf)__mask, (s)); }) 10025283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren 10035283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm256_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \ 10045283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256 __a = (a); \ 10055283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren float const *__m = (m); \ 10065283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256i __i = (i); \ 10075283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256 __mask = (mask); \ 10085283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__m256)__builtin_ia32_gatherd_ps256((__v8sf)__a, (const __v8sf *)__m, \ 10095283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__v8si)__i, (__v8sf)__mask, (s)); }) 10105283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren 10115283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \ 10125283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128 __a = (a); \ 10135283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren float const *__m = (m); \ 10145283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128i __i = (i); \ 10155283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m128 __mask = (mask); \ 10165283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__m128)__builtin_ia32_gatherq_ps((__v4sf)__a, (const __v4sf *)__m, \ 10175283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren (__v2di)__i, (__v4sf)__mask, (s)); }) 10185283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren 10195283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm256_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \ 1020c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128 __a = (a); \ 10215283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren float const *__m = (m); \ 10225283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren __m256i __i = (i); \ 1023c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128 __mask = (mask); \ 1024c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m128)__builtin_ia32_gatherq_ps256((__v4sf)__a, (const __v4sf *)__m, \ 1025c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v4di)__i, (__v4sf)__mask, (s)); }) 1026c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren 1027c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \ 1028c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __a = (a); \ 1029c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren int const *__m = (m); \ 1030c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __i = (i); \ 1031c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __mask = (mask); \ 1032c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m128i)__builtin_ia32_gatherd_d((__v4si)__a, (const __v4si *)__m, \ 1033c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v4si)__i, (__v4si)__mask, (s)); }) 1034c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren 1035c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm256_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \ 1036c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __a = (a); \ 1037c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren int const *__m = (m); \ 1038c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __i = (i); \ 1039c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __mask = (mask); \ 1040c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m256i)__builtin_ia32_gatherd_d256((__v8si)__a, (const __v8si *)__m, \ 1041c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v8si)__i, (__v8si)__mask, (s)); }) 1042c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren 1043c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \ 1044c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __a = (a); \ 1045c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren int const *__m = (m); \ 1046c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __i = (i); \ 1047c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __mask = (mask); \ 1048c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m128i)__builtin_ia32_gatherq_d((__v4si)__a, (const __v4si *)__m, \ 1049c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v2di)__i, (__v4si)__mask, (s)); }) 1050c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren 1051c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm256_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \ 1052c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __a = (a); \ 1053c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren int const *__m = (m); \ 1054c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __i = (i); \ 1055c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __mask = (mask); \ 1056c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m128i)__builtin_ia32_gatherq_d256((__v4si)__a, (const __v4si *)__m, \ 1057c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v4di)__i, (__v4si)__mask, (s)); }) 1058c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren 1059c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \ 1060c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __a = (a); \ 1061c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren int const *__m = (m); \ 1062c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __i = (i); \ 1063c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __mask = (mask); \ 1064c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m128i)__builtin_ia32_gatherd_q((__v2di)__a, (const __v2di *)__m, \ 1065c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v4si)__i, (__v2di)__mask, (s)); }) 1066c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren 1067c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm256_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \ 1068c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __a = (a); \ 1069c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren int const *__m = (m); \ 1070c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __i = (i); \ 1071c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __mask = (mask); \ 1072c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m256i)__builtin_ia32_gatherd_q256((__v4di)__a, (const __v4di *)__m, \ 1073c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v4si)__i, (__v4di)__mask, (s)); }) 1074c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren 1075c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \ 1076c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __a = (a); \ 1077c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren int const *__m = (m); \ 1078c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __i = (i); \ 1079c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m128i __mask = (mask); \ 1080c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m128i)__builtin_ia32_gatherq_q((__v2di)__a, (const __v2di *)__m, \ 1081c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v2di)__i, (__v2di)__mask, (s)); }) 1082c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren 1083c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm256_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \ 1084c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __a = (a); \ 1085c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren int const *__m = (m); \ 1086c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __i = (i); \ 1087c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren __m256i __mask = (mask); \ 1088c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__m256i)__builtin_ia32_gatherq_q256((__v4di)__a, (const __v4di *)__m, \ 1089c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren (__v4di)__i, (__v4di)__mask, (s)); }) 109056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 109156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i32gather_pd(m, i, s) __extension__ ({ \ 109256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren double const *__m = (m); \ 109356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 109456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_setzero_pd(), \ 109556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v2df *)__m, (__v4si)__i, \ 109656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v2df)_mm_set1_pd((double)(long long int)-1), (s)); }) 109756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 109856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i32gather_pd(m, i, s) __extension__ ({ \ 109956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren double const *__m = (m); \ 110056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 110156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_setzero_pd(), \ 110256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4df *)__m, (__v4si)__i, \ 110356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4df)_mm256_set1_pd((double)(long long int)-1), (s)); }) 110456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 110556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i64gather_pd(m, i, s) __extension__ ({ \ 110656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren double const *__m = (m); \ 110756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 110856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_setzero_pd(), \ 110956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v2df *)__m, (__v2di)__i, \ 111056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v2df)_mm_set1_pd((double)(long long int)-1), (s)); }) 111156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 111256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i64gather_pd(m, i, s) __extension__ ({ \ 111356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren double const *__m = (m); \ 111456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m256i __i = (i); \ 111556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_setzero_pd(), \ 111656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4df *)__m, (__v4di)__i, \ 111756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4df)_mm256_set1_pd((double)(long long int)-1), (s)); }) 111856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 111956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i32gather_ps(m, i, s) __extension__ ({ \ 112056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren float const *__m = (m); \ 112156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 112256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_setzero_ps(), \ 112356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4sf *)__m, (__v4si)__i, \ 112456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4sf)_mm_set1_ps((float)(int)-1), (s)); }) 112556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 112656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i32gather_ps(m, i, s) __extension__ ({ \ 112756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren float const *__m = (m); \ 112856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m256i __i = (i); \ 112956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_setzero_ps(), \ 113056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v8sf *)__m, (__v8si)__i, \ 113156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v8sf)_mm256_set1_ps((float)(int)-1), (s)); }) 113256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 113356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i64gather_ps(m, i, s) __extension__ ({ \ 113456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren float const *__m = (m); \ 113556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 113656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_setzero_ps(), \ 113756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4sf *)__m, (__v2di)__i, \ 113856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4sf)_mm_set1_ps((float)(int)-1), (s)); }) 113956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 114056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i64gather_ps(m, i, s) __extension__ ({ \ 114156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren float const *__m = (m); \ 114256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m256i __i = (i); \ 114356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_setzero_ps(), \ 114456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4sf *)__m, (__v4di)__i, \ 114556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4sf)_mm_set1_ps((float)(int)-1), (s)); }) 114656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 114756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i32gather_epi32(m, i, s) __extension__ ({ \ 114856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren int const *__m = (m); \ 114956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 115056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_setzero_si128(), \ 115156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4si *)__m, (__v4si)__i, \ 115256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4si)_mm_set1_epi32(-1), (s)); }) 115356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 115456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i32gather_epi32(m, i, s) __extension__ ({ \ 115556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren int const *__m = (m); \ 115656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m256i __i = (i); \ 115756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_setzero_si256(), \ 115856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v8si *)__m, (__v8si)__i, \ 115956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v8si)_mm256_set1_epi32(-1), (s)); }) 116056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 116156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i64gather_epi32(m, i, s) __extension__ ({ \ 116256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren int const *__m = (m); \ 116356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 116456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_setzero_si128(), \ 116556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4si *)__m, (__v2di)__i, \ 116656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4si)_mm_set1_epi32(-1), (s)); }) 116756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 116856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i64gather_epi32(m, i, s) __extension__ ({ \ 116956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren int const *__m = (m); \ 117056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m256i __i = (i); \ 117156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_setzero_si128(), \ 117256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4si *)__m, (__v4di)__i, \ 117356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4si)_mm_set1_epi32(-1), (s)); }) 117456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 117556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i32gather_epi64(m, i, s) __extension__ ({ \ 117656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren int const *__m = (m); \ 117756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 117856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_setzero_si128(), \ 117956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v2di *)__m, (__v4si)__i, \ 118056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v2di)_mm_set1_epi64x(-1), (s)); }) 118156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 118256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i32gather_epi64(m, i, s) __extension__ ({ \ 118356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren int const *__m = (m); \ 118456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 118556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_setzero_si256(), \ 118656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4di *)__m, (__v4si)__i, \ 118756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4di)_mm256_set1_epi64x(-1), (s)); }) 118856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 118956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i64gather_epi64(m, i, s) __extension__ ({ \ 119056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren int const *__m = (m); \ 119156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m128i __i = (i); \ 119256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_setzero_si128(), \ 119356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v2di *)__m, (__v2di)__i, \ 119456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v2di)_mm_set1_epi64x(-1), (s)); }) 119556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren 119656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i64gather_epi64(m, i, s) __extension__ ({ \ 119756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren int const *__m = (m); \ 119856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren __m256i __i = (i); \ 119956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_setzero_si256(), \ 120056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (const __v4di *)__m, (__v4di)__i, \ 120156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren (__v4di)_mm256_set1_epi64x(-1), (s)); }) 1202