avx2intrin.h revision cbe627b54eaeeeac7a28725de6c9b60b4d3ab32d
1925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper/*===---- avx2intrin.h - AVX2 intrinsics -----------------------------------=== 2925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * 3925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * Permission is hereby granted, free of charge, to any person obtaining a copy 4925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * of this software and associated documentation files (the "Software"), to deal 5925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * in the Software without restriction, including without limitation the rights 6925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * copies of the Software, and to permit persons to whom the Software is 8925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * furnished to do so, subject to the following conditions: 9925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * 10925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * The above copyright notice and this permission notice shall be included in 11925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * all copies or substantial portions of the Software. 12925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * 13925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * THE SOFTWARE. 20925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * 21925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper *===-----------------------------------------------------------------------=== 22925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper */ 23925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 24925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper#ifndef __IMMINTRIN_H 25925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper#error "Never use <avx2intrin.h> directly; include <immintrin.h> instead." 26925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper#endif 27925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 28925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper/* SSE4 Multiple Packed Sums of Absolute Difference. */ 29925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper#define _mm256_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw256((X), (Y), (M)) 30925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 31925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 32925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_abs_epi8(__m256i a) 33925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 34925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)__builtin_ia32_pabsb256((__v32qi)a); 35925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 36925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 37925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 38925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_abs_epi16(__m256i a) 39925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 40925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)__builtin_ia32_pabsw256((__v16hi)a); 41925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 42925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 43925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 44925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_abs_epi32(__m256i a) 45925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 46925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)__builtin_ia32_pabsd256((__v8si)a); 47925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 48925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 49925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 50925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_packs_epi16(__m256i a, __m256i b) 51925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 52925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)__builtin_ia32_packsswb256((__v16hi)a, (__v16hi)b); 53925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 54925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 55925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 56925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_packs_epi32(__m256i a, __m256i b) 57925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 58925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)__builtin_ia32_packssdw256((__v8si)a, (__v8si)b); 59925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 60925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 61925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 62925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_packus_epi16(__m256i a, __m256i b) 63925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 64925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)__builtin_ia32_packuswb256((__v16hi)a, (__v16hi)b); 65925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 66925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 67925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 68925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_packus_epi32(__m256i __V1, __m256i __V2) 69925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 70925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2); 71925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 72925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 73925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 74925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_add_epi8(__m256i a, __m256i b) 75925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 76925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)((__v32qi)a + (__v32qi)b); 77925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 78925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 79925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 80925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_add_epi16(__m256i a, __m256i b) 81925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 82925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)((__v16hi)a + (__v16hi)b); 83925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 84925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 85925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 86925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_add_epi32(__m256i a, __m256i b) 87925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 88925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)((__v8si)a + (__v8si)b); 89925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 90925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 91925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 92925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_add_epi64(__m256i a, __m256i b) 93925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 94925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return a + b; 95925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 96925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 97925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 989c2ffd803af03f1728423d0d73ff87d988642633Craig Topper_mm256_adds_epi8(__m256i a, __m256i b) 999c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 1009c2ffd803af03f1728423d0d73ff87d988642633Craig Topper return (__m256i)__builtin_ia32_paddsb256((__v32qi)a, (__v32qi)b); 1019c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 1029c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 1039c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1049c2ffd803af03f1728423d0d73ff87d988642633Craig Topper_mm256_adds_epi16(__m256i a, __m256i b) 1059c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 1069c2ffd803af03f1728423d0d73ff87d988642633Craig Topper return (__m256i)__builtin_ia32_paddsw256((__v16hi)a, (__v16hi)b); 1079c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 1089c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 1099c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1109c2ffd803af03f1728423d0d73ff87d988642633Craig Topper_mm256_adds_epu8(__m256i a, __m256i b) 1119c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 1129c2ffd803af03f1728423d0d73ff87d988642633Craig Topper return (__m256i)__builtin_ia32_paddusb256((__v32qi)a, (__v32qi)b); 1139c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 1149c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 1159c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1169c2ffd803af03f1728423d0d73ff87d988642633Craig Topper_mm256_adds_epu16(__m256i a, __m256i b) 1179c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 1189c2ffd803af03f1728423d0d73ff87d988642633Craig Topper return (__m256i)__builtin_ia32_paddusw256((__v16hi)a, (__v16hi)b); 1199c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 1209c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 1219c2ffd803af03f1728423d0d73ff87d988642633Craig Topper#define _mm256_alignr_epi8(a, b, n) __extension__ ({ \ 1229c2ffd803af03f1728423d0d73ff87d988642633Craig Topper __m256i __a = (a); \ 1239c2ffd803af03f1728423d0d73ff87d988642633Craig Topper __m256i __b = (b); \ 1249c2ffd803af03f1728423d0d73ff87d988642633Craig Topper (__m256i)__builtin_ia32_palignr256((__v32qi)__a, (__v32qi)__b, (n)); }) 1259c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 1269c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 127735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper_mm256_and_si256(__m256i a, __m256i b) 128735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper{ 129735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper return a & b; 130735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper} 131735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper 132735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 133735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper_mm256_andnot_si256(__m256i a, __m256i b) 134735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper{ 135735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper return ~a & b; 136735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper} 137735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper 138735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1394c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_avg_epu8(__m256i a, __m256i b) 1404c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1414c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper return (__m256i)__builtin_ia32_pavgb256((__v32qi)a, (__v32qi)b); 1424c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1434c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1444c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1454c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_avg_epu16(__m256i a, __m256i b) 1464c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1474c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper return (__m256i)__builtin_ia32_pavgw256((__v16hi)a, (__v16hi)b); 1484c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1494c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1504c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1514c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M) 1524c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1534c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2, 1544c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper (__v32qi)__M); 1554c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1564c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1574c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper#define _mm256_blend_epi16(V1, V2, M) __extension__ ({ \ 1584c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper __m256i __V1 = (V1); \ 1594c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper __m256i __V2 = (V2); \ 1604c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper (__m256i)__builtin_ia32_pblendw256((__v16hi)__V1, (__v16hi)__V2, M); }) 1614c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1624c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1634c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_cmpeq_epi8(__m256i a, __m256i b) 1644c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1654c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper return (__m256i)((__v32qi)a == (__v32qi)b); 1664c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1674c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1684c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1694c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_cmpeq_epi16(__m256i a, __m256i b) 1704c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1714c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper return (__m256i)((__v16hi)a == (__v16hi)b); 1724c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1734c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1744c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1754c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_cmpeq_epi32(__m256i a, __m256i b) 1764c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1774c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper return (__m256i)((__v8si)a == (__v8si)b); 1784c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1794c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1804c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1814c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_cmpeq_epi64(__m256i a, __m256i b) 1824c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1834c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper return (__m256i)((__v4di)a == (__v4di)b); 1844c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1854c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1864c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1874c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_cmpgt_epi8(__m256i a, __m256i b) 1884c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1894c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper return (__m256i)((__v32qi)a > (__v32qi)b); 1904c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1914c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1924c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1934c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_cmpgt_epi16(__m256i a, __m256i b) 1944c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 1954c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper return (__m256i)((__v16hi)a > (__v16hi)b); 1964c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 1974c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 1984c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 1994c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_cmpgt_epi32(__m256i a, __m256i b) 2004c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 2014c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper return (__m256i)((__v8si)a > (__v8si)b); 2024c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 2034c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 2044c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2054c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_cmpgt_epi64(__m256i a, __m256i b) 2064c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{ 2074c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper return (__m256i)((__v4di)a > (__v4di)b); 2084c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper} 2094c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper 2104c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 211318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper_mm256_hadd_epi16(__m256i a, __m256i b) 212318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{ 213318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper return (__m256i)__builtin_ia32_phaddw256((__v16hi)a, (__v16hi)b); 214318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper} 215318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper 216318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 217318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper_mm256_hadd_epi32(__m256i a, __m256i b) 218318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{ 219318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper return (__m256i)__builtin_ia32_phaddd256((__v8si)a, (__v8si)b); 220318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper} 221318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper 222318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 223318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper_mm256_hadds_epi16(__m256i a, __m256i b) 224318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{ 225318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper return (__m256i)__builtin_ia32_phaddsw256((__v16hi)a, (__v16hi)b); 226318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper} 227318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper 228318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 229318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper_mm256_hsub_epi16(__m256i a, __m256i b) 230318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{ 231318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper return (__m256i)__builtin_ia32_phsubw256((__v16hi)a, (__v16hi)b); 232318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper} 233318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper 234318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 235318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper_mm256_hsub_epi32(__m256i a, __m256i b) 236318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{ 237318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper return (__m256i)__builtin_ia32_phsubd256((__v8si)a, (__v8si)b); 238318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper} 239318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper 240318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 241318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper_mm256_hsubs_epi16(__m256i a, __m256i b) 242318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{ 243318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper return (__m256i)__builtin_ia32_phsubsw256((__v16hi)a, (__v16hi)b); 244318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper} 245318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper 246318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2474a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper_mm256_maddubs_epi16(__m256i a, __m256i b) 2484a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper{ 2494a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)a, (__v32qi)b); 2504a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper} 2514a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper 2524a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 2534a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper_mm256_madd_epi16(__m256i a, __m256i b) 2544a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper{ 2554a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)a, (__v16hi)b); 2564a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper} 2574a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper 2584a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 259231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_max_epi8(__m256i a, __m256i b) 260231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 261231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)a, (__v32qi)b); 262231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 263231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 264231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 265231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_max_epi16(__m256i a, __m256i b) 266231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 267231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)a, (__v16hi)b); 268231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 269231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 270231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 271231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_max_epi32(__m256i a, __m256i b) 272231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 273231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmaxsd256((__v8si)a, (__v8si)b); 274231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 275231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 276231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 277231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_max_epu8(__m256i a, __m256i b) 278231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 279231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmaxub256((__v32qi)a, (__v32qi)b); 280231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 281231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 282231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 283231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_max_epu16(__m256i a, __m256i b) 284231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 285231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)a, (__v16hi)b); 286231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 287231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 288231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 289231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_max_epu32(__m256i a, __m256i b) 290231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 291231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmaxud256((__v8si)a, (__v8si)b); 292231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 293231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 294231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 295231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_min_epi8(__m256i a, __m256i b) 296231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 297231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pminsb256((__v32qi)a, (__v32qi)b); 298231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 299231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 300231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 301231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_min_epi16(__m256i a, __m256i b) 302231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 303231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pminsw256((__v16hi)a, (__v16hi)b); 304231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 305231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 306231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 307231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_min_epi32(__m256i a, __m256i b) 308231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 309231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pminsd256((__v8si)a, (__v8si)b); 310231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 311231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 312231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 313231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_min_epu8(__m256i a, __m256i b) 314231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 315231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pminub256((__v32qi)a, (__v32qi)b); 316231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 317231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 318231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 319231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_min_epu16(__m256i a, __m256i b) 320231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 321231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)a, (__v16hi)b); 322231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 323231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 324231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 325231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_min_epu32(__m256i a, __m256i b) 326231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 327231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pminud256((__v8si)a, (__v8si)b); 328231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 329231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 330231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 331231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_movemask_epi8(__m256i a) 332231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 333231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return __builtin_ia32_pmovmskb256((__v32qi)a); 334231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 335231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 336231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 337231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi8_epi16(__m128i __V) 338231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 339231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovsxbw256((__v16qi)__V); 340231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 341231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 342231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 343231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi8_epi32(__m128i __V) 344231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 345231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovsxbd256((__v16qi)__V); 346231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 347231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 348231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 349231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi8_epi64(__m128i __V) 350231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 351231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovsxbq256((__v16qi)__V); 352231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 353231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 354231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 355231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi16_epi32(__m128i __V) 356231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 357231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovsxwd256((__v8hi)__V); 358231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 359231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 360231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 361231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi16_epi64(__m128i __V) 362231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 363231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovsxwq256((__v8hi)__V); 364231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 365231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 366231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 367231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi32_epi64(__m128i __V) 368231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 369231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovsxdq256((__v4si)__V); 370231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 371231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 372231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 373231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu8_epi16(__m128i __V) 374231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 375231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovzxbw256((__v16qi)__V); 376231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 377231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 378231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 379231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu8_epi32(__m128i __V) 380231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 381231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovzxbd256((__v16qi)__V); 382231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 383231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 384231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 385231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu8_epi64(__m128i __V) 386231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 387231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovzxbq256((__v16qi)__V); 388231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 389231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 390231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 391231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu16_epi32(__m128i __V) 392231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 393231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovzxwd256((__v8hi)__V); 394231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 395231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 396231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 397231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu16_epi64(__m128i __V) 398231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 399231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovzxwq256((__v8hi)__V); 400231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 401231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 402231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 403231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu32_epi64(__m128i __V) 404231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{ 405231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper return (__m256i)__builtin_ia32_pmovzxdq256((__v4si)__V); 406231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper} 407231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper 40828a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 40928a324a30b0677309a4c5d73ef5197398265e129Craig Topper_mm256_mul_epi32(__m256i a, __m256i b) 41028a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 41128a324a30b0677309a4c5d73ef5197398265e129Craig Topper return (__m256i)__builtin_ia32_pmuldq256((__v8si)a, (__v8si)b); 41228a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 41328a324a30b0677309a4c5d73ef5197398265e129Craig Topper 41428a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 41528a324a30b0677309a4c5d73ef5197398265e129Craig Topper_mm256_mulhrs_epi16(__m256i a, __m256i b) 41628a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 41728a324a30b0677309a4c5d73ef5197398265e129Craig Topper return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)a, (__v16hi)b); 41828a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 41928a324a30b0677309a4c5d73ef5197398265e129Craig Topper 42028a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 42128a324a30b0677309a4c5d73ef5197398265e129Craig Topper_mm256_mulhi_epu16(__m256i a, __m256i b) 42228a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 42328a324a30b0677309a4c5d73ef5197398265e129Craig Topper return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)a, (__v16hi)b); 42428a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 42528a324a30b0677309a4c5d73ef5197398265e129Craig Topper 42628a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 42728a324a30b0677309a4c5d73ef5197398265e129Craig Topper_mm256_mulhi_epi16(__m256i a, __m256i b) 42828a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 42928a324a30b0677309a4c5d73ef5197398265e129Craig Topper return (__m256i)__builtin_ia32_pmulhw256((__v16hi)a, (__v16hi)b); 43028a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 43128a324a30b0677309a4c5d73ef5197398265e129Craig Topper 43228a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 43328a324a30b0677309a4c5d73ef5197398265e129Craig Topper_mm256_mullo_epi16(__m256i a, __m256i b) 43428a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 43528a324a30b0677309a4c5d73ef5197398265e129Craig Topper return (__m256i)((__v16hi)a * (__v16hi)b); 43628a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 43728a324a30b0677309a4c5d73ef5197398265e129Craig Topper 43828a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 43928a324a30b0677309a4c5d73ef5197398265e129Craig Topper_mm256_mullo_epi32 (__m256i a, __m256i b) 44028a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 44128a324a30b0677309a4c5d73ef5197398265e129Craig Topper return (__m256i)((__v8si)a * (__v8si)b); 44228a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 44328a324a30b0677309a4c5d73ef5197398265e129Craig Topper 44428a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 44528a324a30b0677309a4c5d73ef5197398265e129Craig Topper_mm256_mul_epu32(__m256i a, __m256i b) 44628a324a30b0677309a4c5d73ef5197398265e129Craig Topper{ 44728a324a30b0677309a4c5d73ef5197398265e129Craig Topper return __builtin_ia32_pmuludq256((__v8si)a, (__v8si)b); 44828a324a30b0677309a4c5d73ef5197398265e129Craig Topper} 44928a324a30b0677309a4c5d73ef5197398265e129Craig Topper 450231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 451735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper_mm256_or_si256(__m256i a, __m256i b) 452735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper{ 453735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper return a | b; 454735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper} 455735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper 456735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 457cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sad_epu8(__m256i a, __m256i b) 458cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 459cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return __builtin_ia32_psadbw256((__v32qi)a, (__v32qi)b); 460cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 461cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 462cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 463cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_shuffle_epi8(__m256i a, __m256i b) 464cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 465cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_pshufb256((__v32qi)a, (__v32qi)b); 466cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 467cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 468cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper#define _mm256_shuffle_epi32(a, imm) __extension__ ({ \ 469cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper __m256i __a = (a); \ 470cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)_mm256_set1_epi32(0), \ 471cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (imm) & 0x3, ((imm) & 0xc) >> 2, \ 472cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \ 473cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0x03) >> 0), \ 474cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0x0c) >> 2), \ 475cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0x30) >> 4), \ 476cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0xc0) >> 6)); }) 477cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 478cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper#define _mm256_shufflehi_epi16(a, imm) __extension__ ({ \ 479cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper __m256i __a = (a); \ 480cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)_mm256_set1_epi16(0), \ 481cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 0, 1, 2, 3, \ 482cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0x03) >> 0), \ 483cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0x0c) >> 2), \ 484cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0x30) >> 4), \ 485cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4 + (((imm) & 0xc0) >> 6), \ 486cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 8, 9, 10, 11, \ 487cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 12 + (((imm) & 0x03) >> 0), \ 488cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 12 + (((imm) & 0x0c) >> 2), \ 489cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 12 + (((imm) & 0x30) >> 4), \ 490cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 12 + (((imm) & 0xc0) >> 6)); }) 491cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 492cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper#define _mm256_shufflelo_epi16(a, imm) __extension__ ({ \ 493cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper __m256i __a = (a); \ 494cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)_mm256_set1_epi16(0), \ 495cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (imm) & 0x3,((imm) & 0xc) >> 2, \ 496cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \ 497cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 4, 5, 6, 7, \ 498cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 8 + (((imm) & 0x03) >> 0), \ 499cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 8 + (((imm) & 0x0c) >> 2), \ 500cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 8 + (((imm) & 0x30) >> 4), \ 501cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 8 + (((imm) & 0xc0) >> 6), \ 502cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 12, 13, 14, 15); }) 503cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 504cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 505cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sign_epi8(__m256i a, __m256i b) 506cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 507cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_psignb256((__v32qi)a, (__v32qi)b); 508cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 509cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 510cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 511cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sign_epi16(__m256i a, __m256i b) 512cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 513cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_psignw256((__v16hi)a, (__v16hi)b); 514cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 515cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 516cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 517cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sign_epi32(__m256i a, __m256i b) 518cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 519cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_psignd256((__v8si)a, (__v8si)b); 520cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 521cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 522cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper#define _mm256_slli_si256(a, count) __extension__ ({ \ 523cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper __m256i __a = (a); \ 524cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (__m256i)__builtin_ia32_pslldqi256(__a, (count)*8); }) 525cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 526cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 527cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_slli_epi16(__m256i a, int count) 528cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 529cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_psllwi256((__v16hi)a, count); 530cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 531cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 532cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 533cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sll_epi16(__m256i a, __m128i count) 534cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 535cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_psllw256((__v16hi)a, (__v8hi)count); 536cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 537cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 538cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 539cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_slli_epi32(__m256i a, int count) 540cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 541cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_pslldi256((__v8si)a, count); 542cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 543cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 544cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 545cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sll_epi32(__m256i a, __m128i count) 546cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 547cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_pslld256((__v8si)a, (__v4si)count); 548cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 549cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 550cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 551cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_slli_epi64(__m256i a, int count) 552cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 553cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return __builtin_ia32_psllqi256(a, count); 554cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 555cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 556cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 557cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sll_epi64(__m256i a, __m128i count) 558cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 559cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return __builtin_ia32_psllq256(a, count); 560cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 561cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 562cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 563cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_srai_epi16(__m256i a, int count) 564cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 565cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_psrawi256((__v16hi)a, count); 566cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 567cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 568cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 569cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sra_epi16(__m256i a, __m128i count) 570cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 571cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_psraw256((__v16hi)a, (__v8hi)count); 572cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 573cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 574cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 575cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_srai_epi32(__m256i a, int count) 576cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 577cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_psradi256((__v8si)a, count); 578cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 579cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 580cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 581cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sra_epi32(__m256i a, __m128i count) 582cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 583cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_psrad256((__v8si)a, (__v4si)count); 584cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 585cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 586cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper#define _mm256_srli_si256(a, count) __extension__ ({ \ 587cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper __m256i __a = (a); \ 588cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper (__m256i)__builtin_ia32_psrldqi256(__a, (count)*8); }) 589cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 590cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 591cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_srli_epi16(__m256i a, int count) 592cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 593cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_psrlwi256((__v16hi)a, count); 594cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 595cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 596cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 597cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_srl_epi16(__m256i a, __m128i count) 598cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 599cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_psrlw256((__v16hi)a, (__v8hi)count); 600cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 601cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 602cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 603cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_srli_epi32(__m256i a, int count) 604cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 605cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_psrldi256((__v8si)a, count); 606cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 607cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 608cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 609cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_srl_epi32(__m256i a, __m128i count) 610cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 611cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return (__m256i)__builtin_ia32_psrld256((__v8si)a, (__v4si)count); 612cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 613cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 614cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 615cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_srli_epi64(__m256i a, int count) 616cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 617cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return __builtin_ia32_psrlqi256(a, count); 618cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 619cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 620cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 621cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_srl_epi64(__m256i a, __m128i count) 622cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{ 623cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper return __builtin_ia32_psrlq256(a, count); 624cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper} 625cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper 626cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 627925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_sub_epi8(__m256i a, __m256i b) 628925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 629925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)((__v32qi)a - (__v32qi)b); 630925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 631925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 632925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 633925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_sub_epi16(__m256i a, __m256i b) 634925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 635925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)((__v16hi)a - (__v16hi)b); 636925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 637925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 638925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 639925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_sub_epi32(__m256i a, __m256i b) 640925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 641925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)((__v8si)a - (__v8si)b); 642925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 643925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 644925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 645925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_sub_epi64(__m256i a, __m256i b) 646925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 647925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return a - b; 648925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 6499c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 6509c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6519c2ffd803af03f1728423d0d73ff87d988642633Craig Topper_mm256_subs_epi8(__m256i a, __m256i b) 6529c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 6539c2ffd803af03f1728423d0d73ff87d988642633Craig Topper return (__m256i)__builtin_ia32_psubsb256((__v32qi)a, (__v32qi)b); 6549c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 6559c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 6569c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6579c2ffd803af03f1728423d0d73ff87d988642633Craig Topper_mm256_subs_epi16(__m256i a, __m256i b) 6589c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 6599c2ffd803af03f1728423d0d73ff87d988642633Craig Topper return (__m256i)__builtin_ia32_psubsw256((__v16hi)a, (__v16hi)b); 6609c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 6619c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 6629c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6639c2ffd803af03f1728423d0d73ff87d988642633Craig Topper_mm256_subs_epu8(__m256i a, __m256i b) 6649c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 6659c2ffd803af03f1728423d0d73ff87d988642633Craig Topper return (__m256i)__builtin_ia32_psubusb256((__v32qi)a, (__v32qi)b); 6669c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 6679c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 6689c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 6699c2ffd803af03f1728423d0d73ff87d988642633Craig Topper_mm256_subs_epu16(__m256i a, __m256i b) 6709c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{ 6719c2ffd803af03f1728423d0d73ff87d988642633Craig Topper return (__m256i)__builtin_ia32_psubusw256((__v16hi)a, (__v16hi)b); 6729c2ffd803af03f1728423d0d73ff87d988642633Craig Topper} 6739c2ffd803af03f1728423d0d73ff87d988642633Craig Topper 674735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 675735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper_mm256_xor_si256(__m256i a, __m256i b) 676735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper{ 677735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper return a ^ b; 678735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper} 679