1925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper/*===---- avx2intrin.h - AVX2 intrinsics -----------------------------------===
2925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper *
3925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * Permission is hereby granted, free of charge, to any person obtaining a copy
4925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * of this software and associated documentation files (the "Software"), to deal
5925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * in the Software without restriction, including without limitation the rights
6925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * copies of the Software, and to permit persons to whom the Software is
8925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * furnished to do so, subject to the following conditions:
9925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper *
10925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * The above copyright notice and this permission notice shall be included in
11925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * all copies or substantial portions of the Software.
12925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper *
13925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * THE SOFTWARE.
20925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper *
21925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper *===-----------------------------------------------------------------------===
22925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper */
23925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper
24925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper#ifndef __IMMINTRIN_H
25925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper#error "Never use <avx2intrin.h> directly; include <immintrin.h> instead."
26925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper#endif
27925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper
28925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper/* SSE4 Multiple Packed Sums of Absolute Difference.  */
29925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper#define _mm256_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw256((X), (Y), (M))
30925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper
31925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
32925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_abs_epi8(__m256i a)
33925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{
34925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper    return (__m256i)__builtin_ia32_pabsb256((__v32qi)a);
35925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper}
36925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper
37925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
38925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_abs_epi16(__m256i a)
39925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{
40925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper    return (__m256i)__builtin_ia32_pabsw256((__v16hi)a);
41925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper}
42925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper
43925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
44925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_abs_epi32(__m256i a)
45925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{
46925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper    return (__m256i)__builtin_ia32_pabsd256((__v8si)a);
47925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper}
48925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper
49925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
50925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_packs_epi16(__m256i a, __m256i b)
51925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{
52925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper  return (__m256i)__builtin_ia32_packsswb256((__v16hi)a, (__v16hi)b);
53925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper}
54925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper
55925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
56925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_packs_epi32(__m256i a, __m256i b)
57925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{
58925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper  return (__m256i)__builtin_ia32_packssdw256((__v8si)a, (__v8si)b);
59925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper}
60925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper
61925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
62925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_packus_epi16(__m256i a, __m256i b)
63925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{
64925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper  return (__m256i)__builtin_ia32_packuswb256((__v16hi)a, (__v16hi)b);
65925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper}
66925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper
67925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
68925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_packus_epi32(__m256i __V1, __m256i __V2)
69925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{
70925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper  return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2);
71925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper}
72925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper
73925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
74925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_add_epi8(__m256i a, __m256i b)
75925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{
76925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper  return (__m256i)((__v32qi)a + (__v32qi)b);
77925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper}
78925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper
79925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
80925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_add_epi16(__m256i a, __m256i b)
81925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{
82925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper  return (__m256i)((__v16hi)a + (__v16hi)b);
83925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper}
84925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper
85925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
86925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_add_epi32(__m256i a, __m256i b)
87925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{
88925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper  return (__m256i)((__v8si)a + (__v8si)b);
89925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper}
90925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper
91925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
92925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_add_epi64(__m256i a, __m256i b)
93925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{
94925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper  return a + b;
95925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper}
96925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper
97925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
989c2ffd803af03f1728423d0d73ff87d988642633Craig Topper_mm256_adds_epi8(__m256i a, __m256i b)
999c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{
1009c2ffd803af03f1728423d0d73ff87d988642633Craig Topper  return (__m256i)__builtin_ia32_paddsb256((__v32qi)a, (__v32qi)b);
1019c2ffd803af03f1728423d0d73ff87d988642633Craig Topper}
1029c2ffd803af03f1728423d0d73ff87d988642633Craig Topper
1039c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
1049c2ffd803af03f1728423d0d73ff87d988642633Craig Topper_mm256_adds_epi16(__m256i a, __m256i b)
1059c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{
1069c2ffd803af03f1728423d0d73ff87d988642633Craig Topper  return (__m256i)__builtin_ia32_paddsw256((__v16hi)a, (__v16hi)b);
1079c2ffd803af03f1728423d0d73ff87d988642633Craig Topper}
1089c2ffd803af03f1728423d0d73ff87d988642633Craig Topper
1099c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
1109c2ffd803af03f1728423d0d73ff87d988642633Craig Topper_mm256_adds_epu8(__m256i a, __m256i b)
1119c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{
1129c2ffd803af03f1728423d0d73ff87d988642633Craig Topper  return (__m256i)__builtin_ia32_paddusb256((__v32qi)a, (__v32qi)b);
1139c2ffd803af03f1728423d0d73ff87d988642633Craig Topper}
1149c2ffd803af03f1728423d0d73ff87d988642633Craig Topper
1159c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
1169c2ffd803af03f1728423d0d73ff87d988642633Craig Topper_mm256_adds_epu16(__m256i a, __m256i b)
1179c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{
1189c2ffd803af03f1728423d0d73ff87d988642633Craig Topper  return (__m256i)__builtin_ia32_paddusw256((__v16hi)a, (__v16hi)b);
1199c2ffd803af03f1728423d0d73ff87d988642633Craig Topper}
1209c2ffd803af03f1728423d0d73ff87d988642633Craig Topper
1219c2ffd803af03f1728423d0d73ff87d988642633Craig Topper#define _mm256_alignr_epi8(a, b, n) __extension__ ({ \
1229c2ffd803af03f1728423d0d73ff87d988642633Craig Topper  __m256i __a = (a); \
1239c2ffd803af03f1728423d0d73ff87d988642633Craig Topper  __m256i __b = (b); \
1249c2ffd803af03f1728423d0d73ff87d988642633Craig Topper  (__m256i)__builtin_ia32_palignr256((__v32qi)__a, (__v32qi)__b, (n)); })
1259c2ffd803af03f1728423d0d73ff87d988642633Craig Topper
1269c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
127735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper_mm256_and_si256(__m256i a, __m256i b)
128735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper{
129735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper  return a & b;
130735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper}
131735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper
132735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
133735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper_mm256_andnot_si256(__m256i a, __m256i b)
134735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper{
135735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper  return ~a & b;
136735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper}
137735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper
138735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
1394c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_avg_epu8(__m256i a, __m256i b)
1404c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{
1414c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper  return (__m256i)__builtin_ia32_pavgb256((__v32qi)a, (__v32qi)b);
1424c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper}
1434c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper
1444c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
1454c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_avg_epu16(__m256i a, __m256i b)
1464c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{
1474c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper  return (__m256i)__builtin_ia32_pavgw256((__v16hi)a, (__v16hi)b);
1484c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper}
1494c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper
1504c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
1514c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)
1524c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{
1534c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper  return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2,
1544c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper                                              (__v32qi)__M);
1554c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper}
1564c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper
1574c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper#define _mm256_blend_epi16(V1, V2, M) __extension__ ({ \
1584c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper  __m256i __V1 = (V1); \
1594c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper  __m256i __V2 = (V2); \
1605aeaca3fa755cddba583842e7a0c3e168bf71b4dCraig Topper  (__m256i)__builtin_ia32_pblendw256((__v16hi)__V1, (__v16hi)__V2, (M)); })
1614c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper
1624c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
1634c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_cmpeq_epi8(__m256i a, __m256i b)
1644c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{
1654c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper  return (__m256i)((__v32qi)a == (__v32qi)b);
1664c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper}
1674c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper
1684c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
1694c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_cmpeq_epi16(__m256i a, __m256i b)
1704c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{
1714c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper  return (__m256i)((__v16hi)a == (__v16hi)b);
1724c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper}
1734c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper
1744c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
1754c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_cmpeq_epi32(__m256i a, __m256i b)
1764c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{
1774c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper  return (__m256i)((__v8si)a == (__v8si)b);
1784c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper}
1794c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper
1804c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
1814c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_cmpeq_epi64(__m256i a, __m256i b)
1824c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{
1835cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper  return (__m256i)(a == b);
1844c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper}
1854c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper
1864c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
1874c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_cmpgt_epi8(__m256i a, __m256i b)
1884c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{
1894c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper  return (__m256i)((__v32qi)a > (__v32qi)b);
1904c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper}
1914c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper
1924c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
1934c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_cmpgt_epi16(__m256i a, __m256i b)
1944c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{
1954c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper  return (__m256i)((__v16hi)a > (__v16hi)b);
1964c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper}
1974c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper
1984c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
1994c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_cmpgt_epi32(__m256i a, __m256i b)
2004c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{
2014c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper  return (__m256i)((__v8si)a > (__v8si)b);
2024c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper}
2034c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper
2044c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
2054c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper_mm256_cmpgt_epi64(__m256i a, __m256i b)
2064c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper{
2075cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper  return (__m256i)(a > b);
2084c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper}
2094c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topper
2104c07c5dfebd270b2f0660e86f056eeafdb26a4fbCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
211318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper_mm256_hadd_epi16(__m256i a, __m256i b)
212318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{
213318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper    return (__m256i)__builtin_ia32_phaddw256((__v16hi)a, (__v16hi)b);
214318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper}
215318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper
216318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
217318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper_mm256_hadd_epi32(__m256i a, __m256i b)
218318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{
219318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper    return (__m256i)__builtin_ia32_phaddd256((__v8si)a, (__v8si)b);
220318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper}
221318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper
222318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
223318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper_mm256_hadds_epi16(__m256i a, __m256i b)
224318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{
225318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper    return (__m256i)__builtin_ia32_phaddsw256((__v16hi)a, (__v16hi)b);
226318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper}
227318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper
228318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
229318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper_mm256_hsub_epi16(__m256i a, __m256i b)
230318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{
231318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper    return (__m256i)__builtin_ia32_phsubw256((__v16hi)a, (__v16hi)b);
232318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper}
233318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper
234318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
235318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper_mm256_hsub_epi32(__m256i a, __m256i b)
236318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{
237318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper    return (__m256i)__builtin_ia32_phsubd256((__v8si)a, (__v8si)b);
238318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper}
239318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper
240318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
241318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper_mm256_hsubs_epi16(__m256i a, __m256i b)
242318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper{
243318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper    return (__m256i)__builtin_ia32_phsubsw256((__v16hi)a, (__v16hi)b);
244318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper}
245318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topper
246318e460ada6e589bd864d9ecb86053cc6852cabfCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
2474a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper_mm256_maddubs_epi16(__m256i a, __m256i b)
2484a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper{
2494a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper    return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)a, (__v32qi)b);
2504a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper}
2514a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper
2524a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
2534a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper_mm256_madd_epi16(__m256i a, __m256i b)
2544a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper{
2554a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper  return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)a, (__v16hi)b);
2564a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper}
2574a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topper
2584a4f25a5a80dd594acf68c882bcdbf1a38468a45Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
259231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_max_epi8(__m256i a, __m256i b)
260231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{
261231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper  return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)a, (__v32qi)b);
262231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper}
263231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper
264231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
265231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_max_epi16(__m256i a, __m256i b)
266231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{
267231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper  return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)a, (__v16hi)b);
268231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper}
269231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper
270231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
271231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_max_epi32(__m256i a, __m256i b)
272231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{
273231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper  return (__m256i)__builtin_ia32_pmaxsd256((__v8si)a, (__v8si)b);
274231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper}
275231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper
276231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
277231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_max_epu8(__m256i a, __m256i b)
278231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{
279231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper  return (__m256i)__builtin_ia32_pmaxub256((__v32qi)a, (__v32qi)b);
280231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper}
281231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper
282231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
283231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_max_epu16(__m256i a, __m256i b)
284231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{
285231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper  return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)a, (__v16hi)b);
286231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper}
287231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper
288231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
289231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_max_epu32(__m256i a, __m256i b)
290231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{
291231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper  return (__m256i)__builtin_ia32_pmaxud256((__v8si)a, (__v8si)b);
292231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper}
293231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper
294231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
295231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_min_epi8(__m256i a, __m256i b)
296231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{
297231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper  return (__m256i)__builtin_ia32_pminsb256((__v32qi)a, (__v32qi)b);
298231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper}
299231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper
300231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
301231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_min_epi16(__m256i a, __m256i b)
302231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{
303231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper  return (__m256i)__builtin_ia32_pminsw256((__v16hi)a, (__v16hi)b);
304231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper}
305231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper
306231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
307231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_min_epi32(__m256i a, __m256i b)
308231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{
309231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper  return (__m256i)__builtin_ia32_pminsd256((__v8si)a, (__v8si)b);
310231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper}
311231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper
312231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
313231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_min_epu8(__m256i a, __m256i b)
314231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{
315231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper  return (__m256i)__builtin_ia32_pminub256((__v32qi)a, (__v32qi)b);
316231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper}
317231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper
318231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
319231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_min_epu16(__m256i a, __m256i b)
320231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{
321231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper  return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)a, (__v16hi)b);
322231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper}
323231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper
324231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
325231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_min_epu32(__m256i a, __m256i b)
326231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{
327231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper  return (__m256i)__builtin_ia32_pminud256((__v8si)a, (__v8si)b);
328231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper}
329231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper
330231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ int __attribute__((__always_inline__, __nodebug__))
331231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_movemask_epi8(__m256i a)
332231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{
333231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper  return __builtin_ia32_pmovmskb256((__v32qi)a);
334231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper}
335231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper
336231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
337231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi8_epi16(__m128i __V)
338231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{
339231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper  return (__m256i)__builtin_ia32_pmovsxbw256((__v16qi)__V);
340231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper}
341231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper
342231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
343231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi8_epi32(__m128i __V)
344231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{
345231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper  return (__m256i)__builtin_ia32_pmovsxbd256((__v16qi)__V);
346231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper}
347231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper
348231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
349231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi8_epi64(__m128i __V)
350231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{
351231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper  return (__m256i)__builtin_ia32_pmovsxbq256((__v16qi)__V);
352231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper}
353231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper
354231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
355231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi16_epi32(__m128i __V)
356231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{
357231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper  return (__m256i)__builtin_ia32_pmovsxwd256((__v8hi)__V);
358231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper}
359231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper
360231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
361231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi16_epi64(__m128i __V)
362231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{
363231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper  return (__m256i)__builtin_ia32_pmovsxwq256((__v8hi)__V);
364231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper}
365231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper
366231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
367231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepi32_epi64(__m128i __V)
368231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{
369231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper  return (__m256i)__builtin_ia32_pmovsxdq256((__v4si)__V);
370231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper}
371231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper
372231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
373231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu8_epi16(__m128i __V)
374231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{
375231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper  return (__m256i)__builtin_ia32_pmovzxbw256((__v16qi)__V);
376231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper}
377231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper
378231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
379231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu8_epi32(__m128i __V)
380231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{
381231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper  return (__m256i)__builtin_ia32_pmovzxbd256((__v16qi)__V);
382231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper}
383231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper
384231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
385231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu8_epi64(__m128i __V)
386231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{
387231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper  return (__m256i)__builtin_ia32_pmovzxbq256((__v16qi)__V);
388231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper}
389231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper
390231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
391231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu16_epi32(__m128i __V)
392231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{
393231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper  return (__m256i)__builtin_ia32_pmovzxwd256((__v8hi)__V);
394231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper}
395231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper
396231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
397231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu16_epi64(__m128i __V)
398231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{
399231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper  return (__m256i)__builtin_ia32_pmovzxwq256((__v8hi)__V);
400231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper}
401231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper
402231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
403231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper_mm256_cvtepu32_epi64(__m128i __V)
404231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper{
405231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper  return (__m256i)__builtin_ia32_pmovzxdq256((__v4si)__V);
406231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper}
407231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topper
40828a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__  __m256i __attribute__((__always_inline__, __nodebug__))
40928a324a30b0677309a4c5d73ef5197398265e129Craig Topper_mm256_mul_epi32(__m256i a, __m256i b)
41028a324a30b0677309a4c5d73ef5197398265e129Craig Topper{
41128a324a30b0677309a4c5d73ef5197398265e129Craig Topper  return (__m256i)__builtin_ia32_pmuldq256((__v8si)a, (__v8si)b);
41228a324a30b0677309a4c5d73ef5197398265e129Craig Topper}
41328a324a30b0677309a4c5d73ef5197398265e129Craig Topper
41428a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
41528a324a30b0677309a4c5d73ef5197398265e129Craig Topper_mm256_mulhrs_epi16(__m256i a, __m256i b)
41628a324a30b0677309a4c5d73ef5197398265e129Craig Topper{
41728a324a30b0677309a4c5d73ef5197398265e129Craig Topper  return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)a, (__v16hi)b);
41828a324a30b0677309a4c5d73ef5197398265e129Craig Topper}
41928a324a30b0677309a4c5d73ef5197398265e129Craig Topper
42028a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
42128a324a30b0677309a4c5d73ef5197398265e129Craig Topper_mm256_mulhi_epu16(__m256i a, __m256i b)
42228a324a30b0677309a4c5d73ef5197398265e129Craig Topper{
42328a324a30b0677309a4c5d73ef5197398265e129Craig Topper  return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)a, (__v16hi)b);
42428a324a30b0677309a4c5d73ef5197398265e129Craig Topper}
42528a324a30b0677309a4c5d73ef5197398265e129Craig Topper
42628a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
42728a324a30b0677309a4c5d73ef5197398265e129Craig Topper_mm256_mulhi_epi16(__m256i a, __m256i b)
42828a324a30b0677309a4c5d73ef5197398265e129Craig Topper{
42928a324a30b0677309a4c5d73ef5197398265e129Craig Topper  return (__m256i)__builtin_ia32_pmulhw256((__v16hi)a, (__v16hi)b);
43028a324a30b0677309a4c5d73ef5197398265e129Craig Topper}
43128a324a30b0677309a4c5d73ef5197398265e129Craig Topper
43228a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
43328a324a30b0677309a4c5d73ef5197398265e129Craig Topper_mm256_mullo_epi16(__m256i a, __m256i b)
43428a324a30b0677309a4c5d73ef5197398265e129Craig Topper{
43528a324a30b0677309a4c5d73ef5197398265e129Craig Topper  return (__m256i)((__v16hi)a * (__v16hi)b);
43628a324a30b0677309a4c5d73ef5197398265e129Craig Topper}
43728a324a30b0677309a4c5d73ef5197398265e129Craig Topper
43828a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__  __m256i __attribute__((__always_inline__, __nodebug__))
43928a324a30b0677309a4c5d73ef5197398265e129Craig Topper_mm256_mullo_epi32 (__m256i a, __m256i b)
44028a324a30b0677309a4c5d73ef5197398265e129Craig Topper{
44128a324a30b0677309a4c5d73ef5197398265e129Craig Topper  return (__m256i)((__v8si)a * (__v8si)b);
44228a324a30b0677309a4c5d73ef5197398265e129Craig Topper}
44328a324a30b0677309a4c5d73ef5197398265e129Craig Topper
44428a324a30b0677309a4c5d73ef5197398265e129Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
44528a324a30b0677309a4c5d73ef5197398265e129Craig Topper_mm256_mul_epu32(__m256i a, __m256i b)
44628a324a30b0677309a4c5d73ef5197398265e129Craig Topper{
44728a324a30b0677309a4c5d73ef5197398265e129Craig Topper  return __builtin_ia32_pmuludq256((__v8si)a, (__v8si)b);
44828a324a30b0677309a4c5d73ef5197398265e129Craig Topper}
44928a324a30b0677309a4c5d73ef5197398265e129Craig Topper
450231f793326e3a3ad6e07949adb776f45c07f0f7bCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
451735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper_mm256_or_si256(__m256i a, __m256i b)
452735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper{
453735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper  return a | b;
454735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper}
455735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper
456735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
457cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sad_epu8(__m256i a, __m256i b)
458cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{
459cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  return __builtin_ia32_psadbw256((__v32qi)a, (__v32qi)b);
460cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper}
461cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper
462cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
463cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_shuffle_epi8(__m256i a, __m256i b)
464cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{
465cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  return (__m256i)__builtin_ia32_pshufb256((__v32qi)a, (__v32qi)b);
466cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper}
467cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper
468cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper#define _mm256_shuffle_epi32(a, imm) __extension__ ({ \
469cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  __m256i __a = (a); \
470cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)_mm256_set1_epi32(0), \
471cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper                                   (imm) & 0x3, ((imm) & 0xc) >> 2, \
472cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper                                   ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
473cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper                                   4 + (((imm) & 0x03) >> 0), \
474cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper                                   4 + (((imm) & 0x0c) >> 2), \
475cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper                                   4 + (((imm) & 0x30) >> 4), \
476cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper                                   4 + (((imm) & 0xc0) >> 6)); })
477cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper
478cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper#define _mm256_shufflehi_epi16(a, imm) __extension__ ({ \
479cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  __m256i __a = (a); \
480cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)_mm256_set1_epi16(0), \
481cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper                                   0, 1, 2, 3, \
482cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper                                   4 + (((imm) & 0x03) >> 0), \
483cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper                                   4 + (((imm) & 0x0c) >> 2), \
484cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper                                   4 + (((imm) & 0x30) >> 4), \
485cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper                                   4 + (((imm) & 0xc0) >> 6), \
486cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper                                   8, 9, 10, 11, \
487cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper                                   12 + (((imm) & 0x03) >> 0), \
488cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper                                   12 + (((imm) & 0x0c) >> 2), \
489cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper                                   12 + (((imm) & 0x30) >> 4), \
490cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper                                   12 + (((imm) & 0xc0) >> 6)); })
491cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper
492cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper#define _mm256_shufflelo_epi16(a, imm) __extension__ ({ \
493cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  __m256i __a = (a); \
494cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)_mm256_set1_epi16(0), \
495cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper                                   (imm) & 0x3,((imm) & 0xc) >> 2, \
496cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper                                   ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
497cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper                                   4, 5, 6, 7, \
498cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper                                   8 + (((imm) & 0x03) >> 0), \
499cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper                                   8 + (((imm) & 0x0c) >> 2), \
500cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper                                   8 + (((imm) & 0x30) >> 4), \
501cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper                                   8 + (((imm) & 0xc0) >> 6), \
502cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper                                   12, 13, 14, 15); })
503cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper
504cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
505cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sign_epi8(__m256i a, __m256i b)
506cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{
507cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper    return (__m256i)__builtin_ia32_psignb256((__v32qi)a, (__v32qi)b);
508cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper}
509cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper
510cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
511cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sign_epi16(__m256i a, __m256i b)
512cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{
513cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper    return (__m256i)__builtin_ia32_psignw256((__v16hi)a, (__v16hi)b);
514cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper}
515cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper
516cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
517cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sign_epi32(__m256i a, __m256i b)
518cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{
519cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper    return (__m256i)__builtin_ia32_psignd256((__v8si)a, (__v8si)b);
520cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper}
521cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper
522cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper#define _mm256_slli_si256(a, count) __extension__ ({ \
523cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  __m256i __a = (a); \
524cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  (__m256i)__builtin_ia32_pslldqi256(__a, (count)*8); })
525cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper
526cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
527cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_slli_epi16(__m256i a, int count)
528cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{
529cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  return (__m256i)__builtin_ia32_psllwi256((__v16hi)a, count);
530cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper}
531cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper
532cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
533cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sll_epi16(__m256i a, __m128i count)
534cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{
535cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  return (__m256i)__builtin_ia32_psllw256((__v16hi)a, (__v8hi)count);
536cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper}
537cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper
538cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
539cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_slli_epi32(__m256i a, int count)
540cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{
541cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  return (__m256i)__builtin_ia32_pslldi256((__v8si)a, count);
542cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper}
543cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper
544cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
545cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sll_epi32(__m256i a, __m128i count)
546cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{
547cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  return (__m256i)__builtin_ia32_pslld256((__v8si)a, (__v4si)count);
548cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper}
549cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper
550cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
551cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_slli_epi64(__m256i a, int count)
552cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{
553cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  return __builtin_ia32_psllqi256(a, count);
554cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper}
555cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper
556cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
557cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sll_epi64(__m256i a, __m128i count)
558cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{
559cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  return __builtin_ia32_psllq256(a, count);
560cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper}
561cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper
562cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
563cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_srai_epi16(__m256i a, int count)
564cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{
565cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  return (__m256i)__builtin_ia32_psrawi256((__v16hi)a, count);
566cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper}
567cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper
568cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
569cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sra_epi16(__m256i a, __m128i count)
570cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{
571cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  return (__m256i)__builtin_ia32_psraw256((__v16hi)a, (__v8hi)count);
572cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper}
573cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper
574cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
575cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_srai_epi32(__m256i a, int count)
576cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{
577cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  return (__m256i)__builtin_ia32_psradi256((__v8si)a, count);
578cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper}
579cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper
580cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
581cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_sra_epi32(__m256i a, __m128i count)
582cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{
583cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  return (__m256i)__builtin_ia32_psrad256((__v8si)a, (__v4si)count);
584cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper}
585cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper
586cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper#define _mm256_srli_si256(a, count) __extension__ ({ \
587cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  __m256i __a = (a); \
588cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  (__m256i)__builtin_ia32_psrldqi256(__a, (count)*8); })
589cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper
590cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
591cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_srli_epi16(__m256i a, int count)
592cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{
593cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  return (__m256i)__builtin_ia32_psrlwi256((__v16hi)a, count);
594cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper}
595cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper
596cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
597cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_srl_epi16(__m256i a, __m128i count)
598cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{
599cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  return (__m256i)__builtin_ia32_psrlw256((__v16hi)a, (__v8hi)count);
600cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper}
601cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper
602cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
603cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_srli_epi32(__m256i a, int count)
604cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{
605cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  return (__m256i)__builtin_ia32_psrldi256((__v8si)a, count);
606cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper}
607cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper
608cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
609cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_srl_epi32(__m256i a, __m128i count)
610cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{
611cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  return (__m256i)__builtin_ia32_psrld256((__v8si)a, (__v4si)count);
612cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper}
613cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper
614cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
615cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_srli_epi64(__m256i a, int count)
616cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{
617cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  return __builtin_ia32_psrlqi256(a, count);
618cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper}
619cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper
620cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
621cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper_mm256_srl_epi64(__m256i a, __m128i count)
622cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper{
623cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper  return __builtin_ia32_psrlq256(a, count);
624cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper}
625cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topper
626cbe627b54eaeeeac7a28725de6c9b60b4d3ab32dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
627925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_sub_epi8(__m256i a, __m256i b)
628925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{
629925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper  return (__m256i)((__v32qi)a - (__v32qi)b);
630925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper}
631925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper
632925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
633925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_sub_epi16(__m256i a, __m256i b)
634925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{
635925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper  return (__m256i)((__v16hi)a - (__v16hi)b);
636925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper}
637925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper
638925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
639925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_sub_epi32(__m256i a, __m256i b)
640925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{
641925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper  return (__m256i)((__v8si)a - (__v8si)b);
642925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper}
643925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper
644925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
645925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_sub_epi64(__m256i a, __m256i b)
646925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{
647925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper  return a - b;
648925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper}
6499c2ffd803af03f1728423d0d73ff87d988642633Craig Topper
6509c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
6519c2ffd803af03f1728423d0d73ff87d988642633Craig Topper_mm256_subs_epi8(__m256i a, __m256i b)
6529c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{
6539c2ffd803af03f1728423d0d73ff87d988642633Craig Topper  return (__m256i)__builtin_ia32_psubsb256((__v32qi)a, (__v32qi)b);
6549c2ffd803af03f1728423d0d73ff87d988642633Craig Topper}
6559c2ffd803af03f1728423d0d73ff87d988642633Craig Topper
6569c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
6579c2ffd803af03f1728423d0d73ff87d988642633Craig Topper_mm256_subs_epi16(__m256i a, __m256i b)
6589c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{
6599c2ffd803af03f1728423d0d73ff87d988642633Craig Topper  return (__m256i)__builtin_ia32_psubsw256((__v16hi)a, (__v16hi)b);
6609c2ffd803af03f1728423d0d73ff87d988642633Craig Topper}
6619c2ffd803af03f1728423d0d73ff87d988642633Craig Topper
6629c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
6639c2ffd803af03f1728423d0d73ff87d988642633Craig Topper_mm256_subs_epu8(__m256i a, __m256i b)
6649c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{
6659c2ffd803af03f1728423d0d73ff87d988642633Craig Topper  return (__m256i)__builtin_ia32_psubusb256((__v32qi)a, (__v32qi)b);
6669c2ffd803af03f1728423d0d73ff87d988642633Craig Topper}
6679c2ffd803af03f1728423d0d73ff87d988642633Craig Topper
6689c2ffd803af03f1728423d0d73ff87d988642633Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
6699c2ffd803af03f1728423d0d73ff87d988642633Craig Topper_mm256_subs_epu16(__m256i a, __m256i b)
6709c2ffd803af03f1728423d0d73ff87d988642633Craig Topper{
6719c2ffd803af03f1728423d0d73ff87d988642633Craig Topper  return (__m256i)__builtin_ia32_psubusw256((__v16hi)a, (__v16hi)b);
6729c2ffd803af03f1728423d0d73ff87d988642633Craig Topper}
6739c2ffd803af03f1728423d0d73ff87d988642633Craig Topper
674735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
6757f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper_mm256_unpackhi_epi8(__m256i a, __m256i b)
6767f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{
6777f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper  return (__m256i)__builtin_shufflevector((__v32qi)a, (__v32qi)b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31);
6787f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper}
6797f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper
6807f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
6817f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper_mm256_unpackhi_epi16(__m256i a, __m256i b)
6827f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{
6837f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper  return (__m256i)__builtin_shufflevector((__v16hi)a, (__v16hi)b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
6847f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper}
6857f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper
6867f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
6877f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper_mm256_unpackhi_epi32(__m256i a, __m256i b)
6887f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{
6897f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper  return (__m256i)__builtin_shufflevector((__v8si)a, (__v8si)b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7);
6907f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper}
6917f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper
6927f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
6937f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper_mm256_unpackhi_epi64(__m256i a, __m256i b)
6947f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{
6957f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper  return (__m256i)__builtin_shufflevector(a, b, 1, 4+1, 3, 4+3);
6967f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper}
6977f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper
6987f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
6997f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper_mm256_unpacklo_epi8(__m256i a, __m256i b)
7007f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{
7017f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper  return (__m256i)__builtin_shufflevector((__v32qi)a, (__v32qi)b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23);
7027f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper}
7037f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper
7047f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
7057f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper_mm256_unpacklo_epi16(__m256i a, __m256i b)
7067f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{
7077f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper  return (__m256i)__builtin_shufflevector((__v16hi)a, (__v16hi)b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11);
7087f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper}
7097f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper
7107f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
7117f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper_mm256_unpacklo_epi32(__m256i a, __m256i b)
7127f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{
7137f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper  return (__m256i)__builtin_shufflevector((__v8si)a, (__v8si)b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5);
7147f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper}
7157f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper
7167f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
7177f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper_mm256_unpacklo_epi64(__m256i a, __m256i b)
7187f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper{
7197f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper  return (__m256i)__builtin_shufflevector(a, b, 0, 4+0, 2, 4+2);
7207f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper}
7217f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topper
7227f16caa3c087dbc51585ec4bb6e154c10516944dCraig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
723735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper_mm256_xor_si256(__m256i a, __m256i b)
724735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper{
725735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper  return a ^ b;
726735ceaa4ccb60df5993245e645f7127bf4a4325fCraig Topper}
727ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper
728ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
729ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper_mm256_stream_load_si256(__m256i *__V)
730ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper{
731ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper  return (__m256i)__builtin_ia32_movntdqa256((__v4di *)__V);
732ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper}
733ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper
734ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topperstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
735ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper_mm_broadcastss_ps(__m128 __X)
736ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper{
737ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper  return (__m128)__builtin_ia32_vbroadcastss_ps((__v4sf)__X);
738ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper}
739ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper
740ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topperstatic __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
741ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper_mm256_broadcastss_ps(__m128 __X)
742ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper{
743ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper  return (__m256)__builtin_ia32_vbroadcastss_ps256((__v4sf)__X);
744ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper}
745ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper
746ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topperstatic __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
747ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper_mm256_broadcastsd_pd(__m128d __X)
748ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper{
749ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper  return (__m256d)__builtin_ia32_vbroadcastsd_pd256((__v2df)__X);
750ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper}
751ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper
752ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
753ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper_mm_broadcastsi128_si256(__m128i const *a)
754ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper{
755ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper  return (__m256i)__builtin_ia32_vbroadcastsi256(a);
756ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper}
757ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper
758ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper#define _mm_blend_epi32(V1, V2, M) __extension__ ({ \
759ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper  __m128i __V1 = (V1); \
760ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper  __m128i __V2 = (V2); \
76134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper  (__m128i)__builtin_ia32_pblendd128((__v4si)__V1, (__v4si)__V2, (M)); })
762ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper
763ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper#define _mm256_blend_epi32(V1, V2, M) __extension__ ({ \
764ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper  __m256i __V1 = (V1); \
765ee9b41d1544ad3ce4ade47e06c881b2265b17324Craig Topper  __m256i __V2 = (V2); \
76634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper  (__m256i)__builtin_ia32_pblendd256((__v8si)__V1, (__v8si)__V2, (M)); })
76734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper
76834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
76934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm256_broadcastb_epi8(__m128i __X)
77034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{
77134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper  return (__m256i)__builtin_ia32_pbroadcastb256((__v16qi)__X);
77234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper}
77334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper
77434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
77534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm256_broadcastw_epi16(__m128i __X)
77634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{
77734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper  return (__m256i)__builtin_ia32_pbroadcastw256((__v8hi)__X);
77834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper}
77934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper
78034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
78134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm256_broadcastd_epi32(__m128i __X)
78234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{
78334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper  return (__m256i)__builtin_ia32_pbroadcastd256((__v4si)__X);
78434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper}
78534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper
78634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
78734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm256_broadcastq_epi64(__m128i __X)
78834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{
78934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper  return (__m256i)__builtin_ia32_pbroadcastq256(__X);
79034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper}
79134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper
79234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
79334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm_broadcastb_epi8(__m128i __X)
79434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{
79534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper  return (__m128i)__builtin_ia32_pbroadcastb128((__v16qi)__X);
79634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper}
79734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper
79834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
79934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm_broadcastw_epi16(__m128i __X)
80034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{
80134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper  return (__m128i)__builtin_ia32_pbroadcastw128((__v8hi)__X);
80234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper}
80334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper
80434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper
80534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
80634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm_broadcastd_epi32(__m128i __X)
80734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{
80834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper  return (__m128i)__builtin_ia32_pbroadcastd128((__v4si)__X);
80934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper}
81034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper
81134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
81234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm_broadcastq_epi64(__m128i __X)
81334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{
81434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper  return (__m128i)__builtin_ia32_pbroadcastq128(__X);
81534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper}
81634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper
81734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
81834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm256_permutevar8x32_epi32(__m256i a, __m256i b)
81934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{
82034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper  return (__m256i)__builtin_ia32_permvarsi256((__v8si)a, (__v8si)b);
82134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper}
82234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper
82334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper#define _mm256_permute4x64_pd(V, M) __extension__ ({ \
82434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper  __m256d __V = (V); \
825b5491f3d7b688b37745397fafd6c1f10548fd5c2Craig Topper  (__m256d)__builtin_shufflevector((__v4df)__V, (__v4df) _mm256_setzero_pd(), \
826b5491f3d7b688b37745397fafd6c1f10548fd5c2Craig Topper                                   (M) & 0x3, ((M) & 0xc) >> 2, \
827b5491f3d7b688b37745397fafd6c1f10548fd5c2Craig Topper                                   ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); })
82834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper
82934a1da4354959522cd1721ce9ca099cc5c743f01Craig Topperstatic __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
83034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper_mm256_permutevar8x32_ps(__m256 a, __m256 b)
83134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper{
83234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper  return (__m256)__builtin_ia32_permvarsf256((__v8sf)a, (__v8sf)b);
83334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper}
83434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper
83534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper#define _mm256_permute4x64_epi64(V, M) __extension__ ({ \
83634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper  __m256i __V = (V); \
837b5491f3d7b688b37745397fafd6c1f10548fd5c2Craig Topper  (__m256i)__builtin_shufflevector((__v4di)__V, (__v4di) _mm256_setzero_si256(), \
838b5491f3d7b688b37745397fafd6c1f10548fd5c2Craig Topper                                   (M) & 0x3, ((M) & 0xc) >> 2, \
839b5491f3d7b688b37745397fafd6c1f10548fd5c2Craig Topper                                   ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); })
84034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper
84134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper#define _mm256_permute2x128_si256(V1, V2, M) __extension__ ({ \
84234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper  __m256i __V1 = (V1); \
84334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper  __m256i __V2 = (V2); \
84449a110db4c43835681bb89671f8f73c8d8c7c28cCraig Topper  (__m256i)__builtin_ia32_permti256(__V1, __V2, (M)); })
8455cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper
8465cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper#define _mm256_extracti128_si256(A, O) __extension__ ({ \
8475cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper  __m256i __A = (A); \
8485cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper  (__m128i)__builtin_ia32_extract128i256(__A, (O)); })
8495cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper
8505cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper#define _mm256_inserti128_si256(V1, V2, O) __extension__ ({ \
8515cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper  __m256i __V1 = (V1); \
8525cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper  __m128i __V2 = (V2); \
8535cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper  (__m256i)__builtin_ia32_insert128i256(__V1, __V2, (O)); })
8545cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper
8555cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
8565cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_maskload_epi32(int const *__X, __m256i __M)
8575cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{
8585cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper  return (__m256i)__builtin_ia32_maskloadd256((const __v8si *)__X, (__v8si)__M);
8595cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper}
8605cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper
8615cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
8625cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_maskload_epi64(long long const *__X, __m256i __M)
8635cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{
8645cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper  return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, __M);
8655cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper}
8665cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper
8675cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
8685cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_maskload_epi32(int const *__X, __m128i __M)
8695cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{
8705cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper  return (__m128i)__builtin_ia32_maskloadd((const __v4si *)__X, (__v4si)__M);
8715cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper}
8725cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper
8735cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
8745cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_maskload_epi64(long long const *__X, __m128i __M)
8755cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{
8765cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper  return (__m128i)__builtin_ia32_maskloadq((const __v2di *)__X, (__v2di)__M);
8775cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper}
8785cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper
8795cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ void __attribute__((__always_inline__, __nodebug__))
8805cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y)
8815cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{
8825cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper  __builtin_ia32_maskstored256((__v8si *)__X, (__v8si)__M, (__v8si)__Y);
8835cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper}
8845cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper
8855cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ void __attribute__((__always_inline__, __nodebug__))
8865cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y)
8875cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{
8885cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper  __builtin_ia32_maskstoreq256((__v4di *)__X, __M, __Y);
8895cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper}
8905cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper
8915cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ void __attribute__((__always_inline__, __nodebug__))
8925cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_maskstore_epi32(int *__X, __m128i __M, __m128i __Y)
8935cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{
8945cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper  __builtin_ia32_maskstored((__v4si *)__X, (__v4si)__M, (__v4si)__Y);
8955cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper}
8965cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper
8975cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ void __attribute__((__always_inline__, __nodebug__))
8985cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y)
8995cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{
9005cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper  __builtin_ia32_maskstoreq(( __v2di *)__X, __M, __Y);
9015cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper}
9025cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper
9035cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
9045cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_sllv_epi32(__m256i __X, __m256i __Y)
9055cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{
9065cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper  return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y);
9075cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper}
9085cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper
9095cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
9105cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_sllv_epi32(__m128i __X, __m128i __Y)
9115cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{
9125cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper  return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y);
9135cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper}
9145cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper
9155cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
9165cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_sllv_epi64(__m256i __X, __m256i __Y)
9175cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{
9185cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper  return (__m256i)__builtin_ia32_psllv4di(__X, __Y);
9195cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper}
9205cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper
9215cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
9225cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_sllv_epi64(__m128i __X, __m128i __Y)
9235cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{
9245cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper  return (__m128i)__builtin_ia32_psllv2di(__X, __Y);
9255cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper}
9265cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper
9275cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
9285cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_srav_epi32(__m256i __X, __m256i __Y)
9295cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{
9305cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper  return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y);
9315cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper}
9325cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper
9335cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
9345cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_srav_epi32(__m128i __X, __m128i __Y)
9355cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{
9365cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper  return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y);
9375cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper}
9385cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper
9395cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
9405cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_srlv_epi32(__m256i __X, __m256i __Y)
9415cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{
9425cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper  return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y);
9435cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper}
9445cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper
9455cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
9465cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_srlv_epi32(__m128i __X, __m128i __Y)
9475cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{
9485cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper  return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y);
9495cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper}
9505cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper
9515cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
9525cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm256_srlv_epi64(__m256i __X, __m256i __Y)
9535cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{
9545cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper  return (__m256i)__builtin_ia32_psrlv4di(__X, __Y);
9555cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper}
9565cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper
9575cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topperstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
9585cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper_mm_srlv_epi64(__m128i __X, __m128i __Y)
9595cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper{
9605cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper  return (__m128i)__builtin_ia32_psrlv2di(__X, __Y);
9615cbd751a2f9d73248c5336140d73680fcd4669a3Craig Topper}
9625283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren
9635283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \
9645283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  __m128d __a = (a); \
9655283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  double const *__m = (m); \
9665283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  __m128i __i = (i); \
9675283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  __m128d __mask = (mask); \
9685283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  (__m128d)__builtin_ia32_gatherd_pd((__v2df)__a, (const __v2df *)__m, \
9695283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren             (__v4si)__i, (__v2df)__mask, (s)); })
9705283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren
9715283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm256_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \
9725283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  __m256d __a = (a); \
9735283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  double const *__m = (m); \
974c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m128i __i = (i); \
9755283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  __m256d __mask = (mask); \
9765283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  (__m256d)__builtin_ia32_gatherd_pd256((__v4df)__a, (const __v4df *)__m, \
977c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren             (__v4si)__i, (__v4df)__mask, (s)); })
9785283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren
9795283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \
9805283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  __m128d __a = (a); \
9815283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  double const *__m = (m); \
9825283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  __m128i __i = (i); \
9835283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  __m128d __mask = (mask); \
9845283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  (__m128d)__builtin_ia32_gatherq_pd((__v2df)__a, (const __v2df *)__m, \
9855283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren             (__v2di)__i, (__v2df)__mask, (s)); })
9865283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren
9875283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm256_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \
9885283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  __m256d __a = (a); \
9895283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  double const *__m = (m); \
9905283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  __m256i __i = (i); \
9915283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  __m256d __mask = (mask); \
9925283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  (__m256d)__builtin_ia32_gatherq_pd256((__v4df)__a, (const __v4df *)__m, \
9935283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren             (__v4di)__i, (__v4df)__mask, (s)); })
9945283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren
9955283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \
9965283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  __m128 __a = (a); \
9975283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  float const *__m = (m); \
9985283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  __m128i __i = (i); \
9995283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  __m128 __mask = (mask); \
10005283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  (__m128)__builtin_ia32_gatherd_ps((__v4sf)__a, (const __v4sf *)__m, \
10015283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren            (__v4si)__i, (__v4sf)__mask, (s)); })
10025283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren
10035283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm256_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \
10045283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  __m256 __a = (a); \
10055283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  float const *__m = (m); \
10065283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  __m256i __i = (i); \
10075283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  __m256 __mask = (mask); \
10085283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  (__m256)__builtin_ia32_gatherd_ps256((__v8sf)__a, (const __v8sf *)__m, \
10095283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren            (__v8si)__i, (__v8sf)__mask, (s)); })
10105283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren
10115283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \
10125283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  __m128 __a = (a); \
10135283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  float const *__m = (m); \
10145283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  __m128i __i = (i); \
10155283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  __m128 __mask = (mask); \
10165283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  (__m128)__builtin_ia32_gatherq_ps((__v4sf)__a, (const __v4sf *)__m, \
10175283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren            (__v2di)__i, (__v4sf)__mask, (s)); })
10185283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren
10195283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren#define _mm256_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \
1020c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m128 __a = (a); \
10215283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  float const *__m = (m); \
10225283c99365ec4697a5a6bb2b2505469a9aa474d5Manman Ren  __m256i __i = (i); \
1023c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m128 __mask = (mask); \
1024c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  (__m128)__builtin_ia32_gatherq_ps256((__v4sf)__a, (const __v4sf *)__m, \
1025c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren            (__v4di)__i, (__v4sf)__mask, (s)); })
1026c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren
1027c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \
1028c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m128i __a = (a); \
1029c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  int const *__m = (m); \
1030c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m128i __i = (i); \
1031c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m128i __mask = (mask); \
1032c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  (__m128i)__builtin_ia32_gatherd_d((__v4si)__a, (const __v4si *)__m, \
1033c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren            (__v4si)__i, (__v4si)__mask, (s)); })
1034c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren
1035c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm256_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \
1036c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m256i __a = (a); \
1037c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  int const *__m = (m); \
1038c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m256i __i = (i); \
1039c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m256i __mask = (mask); \
1040c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  (__m256i)__builtin_ia32_gatherd_d256((__v8si)__a, (const __v8si *)__m, \
1041c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren            (__v8si)__i, (__v8si)__mask, (s)); })
1042c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren
1043c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \
1044c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m128i __a = (a); \
1045c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  int const *__m = (m); \
1046c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m128i __i = (i); \
1047c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m128i __mask = (mask); \
1048c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  (__m128i)__builtin_ia32_gatherq_d((__v4si)__a, (const __v4si *)__m, \
1049c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren            (__v2di)__i, (__v4si)__mask, (s)); })
1050c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren
1051c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm256_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \
1052c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m128i __a = (a); \
1053c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  int const *__m = (m); \
1054c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m256i __i = (i); \
1055c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m128i __mask = (mask); \
1056c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  (__m128i)__builtin_ia32_gatherq_d256((__v4si)__a, (const __v4si *)__m, \
1057c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren            (__v4di)__i, (__v4si)__mask, (s)); })
1058c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren
1059c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \
1060c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m128i __a = (a); \
1061c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  int const *__m = (m); \
1062c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m128i __i = (i); \
1063c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m128i __mask = (mask); \
1064c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  (__m128i)__builtin_ia32_gatherd_q((__v2di)__a, (const __v2di *)__m, \
1065c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren             (__v4si)__i, (__v2di)__mask, (s)); })
1066c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren
1067c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm256_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \
1068c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m256i __a = (a); \
1069c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  int const *__m = (m); \
1070c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m128i __i = (i); \
1071c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m256i __mask = (mask); \
1072c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  (__m256i)__builtin_ia32_gatherd_q256((__v4di)__a, (const __v4di *)__m, \
1073c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren             (__v4si)__i, (__v4di)__mask, (s)); })
1074c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren
1075c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \
1076c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m128i __a = (a); \
1077c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  int const *__m = (m); \
1078c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m128i __i = (i); \
1079c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m128i __mask = (mask); \
1080c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  (__m128i)__builtin_ia32_gatherq_q((__v2di)__a, (const __v2di *)__m, \
1081c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren             (__v2di)__i, (__v2di)__mask, (s)); })
1082c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren
1083c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren#define _mm256_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \
1084c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m256i __a = (a); \
1085c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  int const *__m = (m); \
1086c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m256i __i = (i); \
1087c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  __m256i __mask = (mask); \
1088c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren  (__m256i)__builtin_ia32_gatherq_q256((__v4di)__a, (const __v4di *)__m, \
1089c84804a7409927813430f5bb1b56bb7b8afbcee8Manman Ren             (__v4di)__i, (__v4di)__mask, (s)); })
109056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren
109156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i32gather_pd(m, i, s) __extension__ ({ \
109256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  double const *__m = (m); \
109356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  __m128i __i = (i); \
109456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  (__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_setzero_pd(), \
109556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren             (const __v2df *)__m, (__v4si)__i, \
109656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren             (__v2df)_mm_set1_pd((double)(long long int)-1), (s)); })
109756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren
109856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i32gather_pd(m, i, s) __extension__ ({ \
109956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  double const *__m = (m); \
110056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  __m128i __i = (i); \
110156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  (__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_setzero_pd(), \
110256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren             (const __v4df *)__m, (__v4si)__i, \
110356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren             (__v4df)_mm256_set1_pd((double)(long long int)-1), (s)); })
110456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren
110556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i64gather_pd(m, i, s) __extension__ ({ \
110656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  double const *__m = (m); \
110756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  __m128i __i = (i); \
110856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  (__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_setzero_pd(), \
110956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren             (const __v2df *)__m, (__v2di)__i, \
111056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren             (__v2df)_mm_set1_pd((double)(long long int)-1), (s)); })
111156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren
111256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i64gather_pd(m, i, s) __extension__ ({ \
111356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  double const *__m = (m); \
111456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  __m256i __i = (i); \
111556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  (__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_setzero_pd(), \
111656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren             (const __v4df *)__m, (__v4di)__i, \
111756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren             (__v4df)_mm256_set1_pd((double)(long long int)-1), (s)); })
111856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren
111956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i32gather_ps(m, i, s) __extension__ ({ \
112056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  float const *__m = (m); \
112156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  __m128i __i = (i); \
112256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  (__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_setzero_ps(), \
112356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren             (const __v4sf *)__m, (__v4si)__i, \
112456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren             (__v4sf)_mm_set1_ps((float)(int)-1), (s)); })
112556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren
112656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i32gather_ps(m, i, s) __extension__ ({ \
112756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  float const *__m = (m); \
112856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  __m256i __i = (i); \
112956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  (__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_setzero_ps(), \
113056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren             (const __v8sf *)__m, (__v8si)__i, \
113156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren             (__v8sf)_mm256_set1_ps((float)(int)-1), (s)); })
113256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren
113356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i64gather_ps(m, i, s) __extension__ ({ \
113456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  float const *__m = (m); \
113556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  __m128i __i = (i); \
113656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  (__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_setzero_ps(), \
113756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren             (const __v4sf *)__m, (__v2di)__i, \
113856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren             (__v4sf)_mm_set1_ps((float)(int)-1), (s)); })
113956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren
114056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i64gather_ps(m, i, s) __extension__ ({ \
114156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  float const *__m = (m); \
114256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  __m256i __i = (i); \
114356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  (__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_setzero_ps(), \
114456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren             (const __v4sf *)__m, (__v4di)__i, \
114556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren             (__v4sf)_mm_set1_ps((float)(int)-1), (s)); })
114656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren
114756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i32gather_epi32(m, i, s) __extension__ ({ \
114856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  int const *__m = (m); \
114956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  __m128i __i = (i); \
115056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  (__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_setzero_si128(), \
115156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren            (const __v4si *)__m, (__v4si)__i, \
115256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren            (__v4si)_mm_set1_epi32(-1), (s)); })
115356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren
115456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i32gather_epi32(m, i, s) __extension__ ({ \
115556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  int const *__m = (m); \
115656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  __m256i __i = (i); \
115756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  (__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_setzero_si256(), \
115856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren            (const __v8si *)__m, (__v8si)__i, \
115956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren            (__v8si)_mm256_set1_epi32(-1), (s)); })
116056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren
116156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i64gather_epi32(m, i, s) __extension__ ({ \
116256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  int const *__m = (m); \
116356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  __m128i __i = (i); \
116456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  (__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_setzero_si128(), \
116556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren            (const __v4si *)__m, (__v2di)__i, \
116656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren            (__v4si)_mm_set1_epi32(-1), (s)); })
116756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren
116856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i64gather_epi32(m, i, s) __extension__ ({ \
116956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  int const *__m = (m); \
117056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  __m256i __i = (i); \
117156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  (__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_setzero_si128(), \
117256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren            (const __v4si *)__m, (__v4di)__i, \
117356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren            (__v4si)_mm_set1_epi32(-1), (s)); })
117456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren
117556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i32gather_epi64(m, i, s) __extension__ ({ \
117656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  int const *__m = (m); \
117756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  __m128i __i = (i); \
117856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  (__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_setzero_si128(), \
117956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren             (const __v2di *)__m, (__v4si)__i, \
118056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren             (__v2di)_mm_set1_epi64x(-1), (s)); })
118156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren
118256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i32gather_epi64(m, i, s) __extension__ ({ \
118356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  int const *__m = (m); \
118456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  __m128i __i = (i); \
118556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  (__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_setzero_si256(), \
118656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren             (const __v4di *)__m, (__v4si)__i, \
118756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren             (__v4di)_mm256_set1_epi64x(-1), (s)); })
118856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren
118956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm_i64gather_epi64(m, i, s) __extension__ ({ \
119056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  int const *__m = (m); \
119156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  __m128i __i = (i); \
119256c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  (__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_setzero_si128(), \
119356c045ed5e148d3eff9b344001370b80ec14d43bManman Ren             (const __v2di *)__m, (__v2di)__i, \
119456c045ed5e148d3eff9b344001370b80ec14d43bManman Ren             (__v2di)_mm_set1_epi64x(-1), (s)); })
119556c045ed5e148d3eff9b344001370b80ec14d43bManman Ren
119656c045ed5e148d3eff9b344001370b80ec14d43bManman Ren#define _mm256_i64gather_epi64(m, i, s) __extension__ ({ \
119756c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  int const *__m = (m); \
119856c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  __m256i __i = (i); \
119956c045ed5e148d3eff9b344001370b80ec14d43bManman Ren  (__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_setzero_si256(), \
120056c045ed5e148d3eff9b344001370b80ec14d43bManman Ren             (const __v4di *)__m, (__v4di)__i, \
120156c045ed5e148d3eff9b344001370b80ec14d43bManman Ren             (__v4di)_mm256_set1_epi64x(-1), (s)); })
1202