155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/*===---- avxintrin.h - AVX intrinsics -------------------------------------=== 255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * 355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * Permission is hereby granted, free of charge, to any person obtaining a copy 455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * of this software and associated documentation files (the "Software"), to deal 555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * in the Software without restriction, including without limitation the rights 655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * copies of the Software, and to permit persons to whom the Software is 855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * furnished to do so, subject to the following conditions: 955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * 1055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * The above copyright notice and this permission notice shall be included in 1155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * all copies or substantial portions of the Software. 1255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * 1355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 1655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 1855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 1955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * THE SOFTWARE. 2055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * 2155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes *===-----------------------------------------------------------------------=== 2255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes */ 2355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 2401b57e362a2c8abb18ba6139ca212e6c7f2288b0Benjamin Kramer#ifndef __IMMINTRIN_H 2501b57e362a2c8abb18ba6139ca212e6c7f2288b0Benjamin Kramer#error "Never use <avxintrin.h> directly; include <immintrin.h> instead." 2601b57e362a2c8abb18ba6139ca212e6c7f2288b0Benjamin Kramer#endif 2755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 287cb4fae8da8f541f43d39896c989b06c69fd7821Richard Smith#ifndef __AVXINTRIN_H 297cb4fae8da8f541f43d39896c989b06c69fd7821Richard Smith#define __AVXINTRIN_H 307cb4fae8da8f541f43d39896c989b06c69fd7821Richard Smith 3155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopestypedef double __v4df __attribute__ ((__vector_size__ (32))); 3255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopestypedef float __v8sf __attribute__ ((__vector_size__ (32))); 3355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopestypedef long long __v4di __attribute__ ((__vector_size__ (32))); 3455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopestypedef int __v8si __attribute__ ((__vector_size__ (32))); 3555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopestypedef short __v16hi __attribute__ ((__vector_size__ (32))); 3655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopestypedef char __v32qi __attribute__ ((__vector_size__ (32))); 3755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 3855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopestypedef float __m256 __attribute__ ((__vector_size__ (32))); 3955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopestypedef double __m256d __attribute__((__vector_size__(32))); 4055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopestypedef long long __m256i __attribute__((__vector_size__(32))); 4155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 4255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Arithmetic */ 4355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 444f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_add_pd(__m256d __a, __m256d __b) 4555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 464f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __a+__b; 4755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 4855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 4955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 504f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_add_ps(__m256 __a, __m256 __b) 5155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 524f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __a+__b; 5355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 5455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 5555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 564f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sub_pd(__m256d __a, __m256d __b) 5755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 584f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __a-__b; 5955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 6055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 6155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 624f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sub_ps(__m256 __a, __m256 __b) 6355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 644f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __a-__b; 6555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 6655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 6755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 684f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_addsub_pd(__m256d __a, __m256d __b) 6955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 704f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b); 7155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 7255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 7355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 744f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_addsub_ps(__m256 __a, __m256 __b) 7555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 764f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b); 7755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 7855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 7955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 804f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_div_pd(__m256d __a, __m256d __b) 8155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 824f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __a / __b; 8355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 8455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 8555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 864f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_div_ps(__m256 __a, __m256 __b) 8755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 884f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __a / __b; 8955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 9055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 9155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 924f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_max_pd(__m256d __a, __m256d __b) 9355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 944f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b); 9555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 9655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 9755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 984f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_max_ps(__m256 __a, __m256 __b) 9955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1004f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b); 10155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 10255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 10355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 1044f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_min_pd(__m256d __a, __m256d __b) 10555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1064f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b); 10755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 10855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 10955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 1104f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_min_ps(__m256 __a, __m256 __b) 11155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1124f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b); 11355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 11455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 11555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 1164f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_mul_pd(__m256d __a, __m256d __b) 11755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1184f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __a * __b; 11955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 12055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 12155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 1224f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_mul_ps(__m256 __a, __m256 __b) 12355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1244f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __a * __b; 12555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 12655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 12755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 1284f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sqrt_pd(__m256d __a) 12955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1304f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a); 13155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 13255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 13355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 1344f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sqrt_ps(__m256 __a) 13555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1364f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a); 13755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 13855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 13955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 1404f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_rsqrt_ps(__m256 __a) 14155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1424f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_rsqrtps256((__v8sf)__a); 14355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 14455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 14555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 1464f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_rcp_ps(__m256 __a) 14755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1484f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_rcpps256((__v8sf)__a); 14955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 15055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 151b8786c4dc4d5a4c72f23a2d46cac5f9bc2641926Chad Rosier#define _mm256_round_pd(V, M) __extension__ ({ \ 152b8786c4dc4d5a4c72f23a2d46cac5f9bc2641926Chad Rosier __m256d __V = (V); \ 15334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper (__m256d)__builtin_ia32_roundpd256((__v4df)__V, (M)); }) 15455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 155b8786c4dc4d5a4c72f23a2d46cac5f9bc2641926Chad Rosier#define _mm256_round_ps(V, M) __extension__ ({ \ 156b8786c4dc4d5a4c72f23a2d46cac5f9bc2641926Chad Rosier __m256 __V = (V); \ 15734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper (__m256)__builtin_ia32_roundps256((__v8sf)__V, (M)); }) 15855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 15955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _mm256_ceil_pd(V) _mm256_round_pd((V), _MM_FROUND_CEIL) 16055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR) 16155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _mm256_ceil_ps(V) _mm256_round_ps((V), _MM_FROUND_CEIL) 16255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR) 16355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 16455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Logical */ 16555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 1664f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_and_pd(__m256d __a, __m256d __b) 16755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1684f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)((__v4di)__a & (__v4di)__b); 16955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 17055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 17155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 1724f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_and_ps(__m256 __a, __m256 __b) 17355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1744f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)((__v8si)__a & (__v8si)__b); 17555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 17655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 17755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 1784f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_andnot_pd(__m256d __a, __m256d __b) 17955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1804f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)(~(__v4di)__a & (__v4di)__b); 18155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 18255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 18355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 1844f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_andnot_ps(__m256 __a, __m256 __b) 18555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1864f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)(~(__v8si)__a & (__v8si)__b); 18755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 18855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 18955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 1904f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_or_pd(__m256d __a, __m256d __b) 19155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1924f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)((__v4di)__a | (__v4di)__b); 19355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 19455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 19555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 1964f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_or_ps(__m256 __a, __m256 __b) 19755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1984f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)((__v8si)__a | (__v8si)__b); 19955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 20055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 20155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 2024f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_xor_pd(__m256d __a, __m256d __b) 20355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 2044f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)((__v4di)__a ^ (__v4di)__b); 20555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 20655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 20755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 2084f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_xor_ps(__m256 __a, __m256 __b) 20955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 2104f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)((__v8si)__a ^ (__v8si)__b); 21155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 21255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 21355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Horizontal arithmetic */ 21455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 2154f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_hadd_pd(__m256d __a, __m256d __b) 21655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 2174f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b); 21855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 21955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 22055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 2214f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_hadd_ps(__m256 __a, __m256 __b) 22255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 2234f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b); 22455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 22555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 22655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 2274f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_hsub_pd(__m256d __a, __m256d __b) 22855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 2294f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b); 23055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 23155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 23255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 2334f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_hsub_ps(__m256 __a, __m256 __b) 23455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 2354f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b); 23655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 23755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 23855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Vector permutations */ 23955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m128d __attribute__((__always_inline__, __nodebug__)) 2404f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_permutevar_pd(__m128d __a, __m128i __c) 24155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 2424f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c); 24355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 24455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 24555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 2464f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_permutevar_pd(__m256d __a, __m256i __c) 24755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 2484f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c); 24955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 25055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 25155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m128 __attribute__((__always_inline__, __nodebug__)) 2524f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_permutevar_ps(__m128 __a, __m128i __c) 25355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 2544f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c); 25555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 25655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 25755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 2584f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_permutevar_ps(__m256 __a, __m256i __c) 25955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 2604f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, 2614f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie (__v8si)__c); 26255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 26355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 264c17f88efa20c9e12c7e07bf02041fd1f0e65d65bChad Rosier#define _mm_permute_pd(A, C) __extension__ ({ \ 265c17f88efa20c9e12c7e07bf02041fd1f0e65d65bChad Rosier __m128d __A = (A); \ 26610c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper (__m128d)__builtin_shufflevector((__v2df)__A, (__v2df) _mm_setzero_pd(), \ 26710c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper (C) & 0x1, ((C) & 0x2) >> 1); }) 26855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 269c17f88efa20c9e12c7e07bf02041fd1f0e65d65bChad Rosier#define _mm256_permute_pd(A, C) __extension__ ({ \ 270c17f88efa20c9e12c7e07bf02041fd1f0e65d65bChad Rosier __m256d __A = (A); \ 27110c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper (__m256d)__builtin_shufflevector((__v4df)__A, (__v4df) _mm256_setzero_pd(), \ 27210c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper (C) & 0x1, ((C) & 0x2) >> 1, \ 27310c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper 2 + (((C) & 0x4) >> 2), \ 27410c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper 2 + (((C) & 0x8) >> 3)); }) 27555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 276d7dd7755fc5092c69f492d6f32cb0e34e63c6a53Chad Rosier#define _mm_permute_ps(A, C) __extension__ ({ \ 277d7dd7755fc5092c69f492d6f32cb0e34e63c6a53Chad Rosier __m128 __A = (A); \ 27810c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper (__m128)__builtin_shufflevector((__v4sf)__A, (__v4sf) _mm_setzero_ps(), \ 27910c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper (C) & 0x3, ((C) & 0xc) >> 2, \ 2805629646711d9c748feb1043a7df2d5ca7d1bdfc4Craig Topper ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6); }) 28155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 282d7dd7755fc5092c69f492d6f32cb0e34e63c6a53Chad Rosier#define _mm256_permute_ps(A, C) __extension__ ({ \ 283d7dd7755fc5092c69f492d6f32cb0e34e63c6a53Chad Rosier __m256 __A = (A); \ 28410c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper (__m256)__builtin_shufflevector((__v8sf)__A, (__v8sf) _mm256_setzero_ps(), \ 28510c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper (C) & 0x3, ((C) & 0xc) >> 2, \ 28610c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6, \ 28710c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper 4 + (((C) & 0x03) >> 0), \ 28810c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper 4 + (((C) & 0x0c) >> 2), \ 28910c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper 4 + (((C) & 0x30) >> 4), \ 29010c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper 4 + (((C) & 0xc0) >> 6)); }) 29155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 292c5cda1121e270548ecf258d0ed72919a5211a94eChad Rosier#define _mm256_permute2f128_pd(V1, V2, M) __extension__ ({ \ 293c5cda1121e270548ecf258d0ed72919a5211a94eChad Rosier __m256d __V1 = (V1); \ 294c5cda1121e270548ecf258d0ed72919a5211a94eChad Rosier __m256d __V2 = (V2); \ 29549a110db4c43835681bb89671f8f73c8d8c7c28cCraig Topper (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)__V1, (__v4df)__V2, (M)); }) 29655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 297c5cda1121e270548ecf258d0ed72919a5211a94eChad Rosier#define _mm256_permute2f128_ps(V1, V2, M) __extension__ ({ \ 298c5cda1121e270548ecf258d0ed72919a5211a94eChad Rosier __m256 __V1 = (V1); \ 299c5cda1121e270548ecf258d0ed72919a5211a94eChad Rosier __m256 __V2 = (V2); \ 30049a110db4c43835681bb89671f8f73c8d8c7c28cCraig Topper (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)__V1, (__v8sf)__V2, (M)); }) 30155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 302c5cda1121e270548ecf258d0ed72919a5211a94eChad Rosier#define _mm256_permute2f128_si256(V1, V2, M) __extension__ ({ \ 303c5cda1121e270548ecf258d0ed72919a5211a94eChad Rosier __m256i __V1 = (V1); \ 304c5cda1121e270548ecf258d0ed72919a5211a94eChad Rosier __m256i __V2 = (V2); \ 30549a110db4c43835681bb89671f8f73c8d8c7c28cCraig Topper (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)__V1, (__v8si)__V2, (M)); }) 30655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 30755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Vector Blend */ 308347208968c303a9c11fe29012f6dc49680465182Eli Friedman#define _mm256_blend_pd(V1, V2, M) __extension__ ({ \ 309347208968c303a9c11fe29012f6dc49680465182Eli Friedman __m256d __V1 = (V1); \ 310347208968c303a9c11fe29012f6dc49680465182Eli Friedman __m256d __V2 = (V2); \ 31134a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper (__m256d)__builtin_ia32_blendpd256((__v4df)__V1, (__v4df)__V2, (M)); }) 31255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 313347208968c303a9c11fe29012f6dc49680465182Eli Friedman#define _mm256_blend_ps(V1, V2, M) __extension__ ({ \ 314347208968c303a9c11fe29012f6dc49680465182Eli Friedman __m256 __V1 = (V1); \ 315347208968c303a9c11fe29012f6dc49680465182Eli Friedman __m256 __V2 = (V2); \ 31634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper (__m256)__builtin_ia32_blendps256((__v8sf)__V1, (__v8sf)__V2, (M)); }) 31755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 31855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 3194f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c) 32055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 3214f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)__builtin_ia32_blendvpd256( 3224f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie (__v4df)__a, (__v4df)__b, (__v4df)__c); 32355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 32455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 32555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 3264f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) 32755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 32841a5fc56b5d66372318a984e30bae5a832787691David Blaikie return (__m256)__builtin_ia32_blendvps256( 3294f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie (__v8sf)__a, (__v8sf)__b, (__v8sf)__c); 33055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 33155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 33255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Vector Dot Product */ 333347208968c303a9c11fe29012f6dc49680465182Eli Friedman#define _mm256_dp_ps(V1, V2, M) __extension__ ({ \ 334347208968c303a9c11fe29012f6dc49680465182Eli Friedman __m256 __V1 = (V1); \ 335347208968c303a9c11fe29012f6dc49680465182Eli Friedman __m256 __V2 = (V2); \ 33634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper (__m256)__builtin_ia32_dpps256((__v8sf)__V1, (__v8sf)__V2, (M)); }) 33755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 33855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Vector shuffle */ 33932bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson#define _mm256_shuffle_ps(a, b, mask) __extension__ ({ \ 34032bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m256 __a = (a); \ 34132bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m256 __b = (b); \ 34232bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson (__m256)__builtin_shufflevector((__v8sf)__a, (__v8sf)__b, \ 343b33aa0f7dfa3a6cadc8ac1ac910f36680cbf7a76Bruno Cardoso Lopes (mask) & 0x3, ((mask) & 0xc) >> 2, \ 34470141c2d11ba555ff5922d8b4a014be2f629e2ecBruno Cardoso Lopes (((mask) & 0x30) >> 4) + 8, (((mask) & 0xc0) >> 6) + 8, \ 345426344dc225978deaa79545e8e14366fa4f8e68dBruno Cardoso Lopes ((mask) & 0x3) + 4, (((mask) & 0xc) >> 2) + 4, \ 34632bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson (((mask) & 0x30) >> 4) + 12, (((mask) & 0xc0) >> 6) + 12); }) 347b33aa0f7dfa3a6cadc8ac1ac910f36680cbf7a76Bruno Cardoso Lopes 34832bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson#define _mm256_shuffle_pd(a, b, mask) __extension__ ({ \ 34932bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m256d __a = (a); \ 35032bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m256d __b = (b); \ 35132bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson (__m256d)__builtin_shufflevector((__v4df)__a, (__v4df)__b, \ 352b33aa0f7dfa3a6cadc8ac1ac910f36680cbf7a76Bruno Cardoso Lopes (mask) & 0x1, \ 353b33aa0f7dfa3a6cadc8ac1ac910f36680cbf7a76Bruno Cardoso Lopes (((mask) & 0x2) >> 1) + 4, \ 354b33aa0f7dfa3a6cadc8ac1ac910f36680cbf7a76Bruno Cardoso Lopes (((mask) & 0x4) >> 2) + 2, \ 35532bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson (((mask) & 0x8) >> 3) + 6); }) 35655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 35755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Compare */ 35855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */ 35955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */ 36055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */ 36155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */ 36255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */ 36355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */ 36455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */ 36555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_ORD_Q 0x07 /* Ordered (nonsignaling) */ 36655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */ 36755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unord, signaling) */ 36855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */ 36955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */ 37055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */ 37155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */ 37255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */ 37355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */ 37455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */ 37555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */ 37655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */ 37755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_UNORD_S 0x13 /* Unordered (signaling) */ 37855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */ 37955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */ 38055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unord, non-signaling) */ 38155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_ORD_S 0x17 /* Ordered (signaling) */ 38255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_EQ_US 0x18 /* Equal (unordered, signaling) */ 38355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unord, non-sign) */ 38455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */ 38555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_FALSE_OS 0x1b /* False (ordered, signaling) */ 38655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */ 38755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */ 38855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */ 38955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */ 39055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 39132bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson#define _mm_cmp_pd(a, b, c) __extension__ ({ \ 39232bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m128d __a = (a); \ 39332bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m128d __b = (b); \ 39432bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson (__m128d)__builtin_ia32_cmppd((__v2df)__a, (__v2df)__b, (c)); }) 39532bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson 39632bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson#define _mm_cmp_ps(a, b, c) __extension__ ({ \ 39732bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m128 __a = (a); \ 39832bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m128 __b = (b); \ 39932bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson (__m128)__builtin_ia32_cmpps((__v4sf)__a, (__v4sf)__b, (c)); }) 40032bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson 40132bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson#define _mm256_cmp_pd(a, b, c) __extension__ ({ \ 40232bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m256d __a = (a); \ 40332bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m256d __b = (b); \ 40432bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson (__m256d)__builtin_ia32_cmppd256((__v4df)__a, (__v4df)__b, (c)); }) 40532bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson 40632bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson#define _mm256_cmp_ps(a, b, c) __extension__ ({ \ 40732bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m256 __a = (a); \ 40832bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m256 __b = (b); \ 40932bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson (__m256)__builtin_ia32_cmpps256((__v8sf)__a, (__v8sf)__b, (c)); }) 41032bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson 41132bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson#define _mm_cmp_sd(a, b, c) __extension__ ({ \ 41232bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m128d __a = (a); \ 41332bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m128d __b = (b); \ 41432bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson (__m128d)__builtin_ia32_cmpsd((__v2df)__a, (__v2df)__b, (c)); }) 41532bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson 41632bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson#define _mm_cmp_ss(a, b, c) __extension__ ({ \ 41732bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m128 __a = (a); \ 41832bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m128 __b = (b); \ 41932bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson (__m128)__builtin_ia32_cmpss((__v4sf)__a, (__v4sf)__b, (c)); }) 42055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 42155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Vector extract */ 4221e4faf56cd310dbd89b7d192db57c3d120bec8a2Chad Rosier#define _mm256_extractf128_pd(A, O) __extension__ ({ \ 4231e4faf56cd310dbd89b7d192db57c3d120bec8a2Chad Rosier __m256d __A = (A); \ 42434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper (__m128d)__builtin_ia32_vextractf128_pd256((__v4df)__A, (O)); }) 42555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 4261e4faf56cd310dbd89b7d192db57c3d120bec8a2Chad Rosier#define _mm256_extractf128_ps(A, O) __extension__ ({ \ 4271e4faf56cd310dbd89b7d192db57c3d120bec8a2Chad Rosier __m256 __A = (A); \ 42834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper (__m128)__builtin_ia32_vextractf128_ps256((__v8sf)__A, (O)); }) 42955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 4301e4faf56cd310dbd89b7d192db57c3d120bec8a2Chad Rosier#define _mm256_extractf128_si256(A, O) __extension__ ({ \ 4311e4faf56cd310dbd89b7d192db57c3d120bec8a2Chad Rosier __m256i __A = (A); \ 43234a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper (__m128i)__builtin_ia32_vextractf128_si256((__v8si)__A, (O)); }) 43355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 43455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 4354f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_extract_epi32(__m256i __a, int const __imm) 43655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 4374f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v8si __b = (__v8si)__a; 4384f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __b[__imm]; 43955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 44055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 44155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 4424f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_extract_epi16(__m256i __a, int const __imm) 44355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 4444f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v16hi __b = (__v16hi)__a; 4454f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __b[__imm]; 44655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 44755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 44855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 4494f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_extract_epi8(__m256i __a, int const __imm) 45055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 4514f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v32qi __b = (__v32qi)__a; 4524f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __b[__imm]; 45355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 45455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 45555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#ifdef __x86_64__ 45655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline long long __attribute__((__always_inline__, __nodebug__)) 4574f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_extract_epi64(__m256i __a, const int __imm) 45855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 4594f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v4di __b = (__v4di)__a; 4604f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __b[__imm]; 46155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 46255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#endif 46355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 46455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Vector insert */ 465b95ddf15e75a6ea27f10c410bbc7a82308b19f4bChad Rosier#define _mm256_insertf128_pd(V1, V2, O) __extension__ ({ \ 466b95ddf15e75a6ea27f10c410bbc7a82308b19f4bChad Rosier __m256d __V1 = (V1); \ 467b95ddf15e75a6ea27f10c410bbc7a82308b19f4bChad Rosier __m128d __V2 = (V2); \ 46834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper (__m256d)__builtin_ia32_vinsertf128_pd256((__v4df)__V1, (__v2df)__V2, (O)); }) 46955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 470b95ddf15e75a6ea27f10c410bbc7a82308b19f4bChad Rosier#define _mm256_insertf128_ps(V1, V2, O) __extension__ ({ \ 471b95ddf15e75a6ea27f10c410bbc7a82308b19f4bChad Rosier __m256 __V1 = (V1); \ 472b95ddf15e75a6ea27f10c410bbc7a82308b19f4bChad Rosier __m128 __V2 = (V2); \ 47334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper (__m256)__builtin_ia32_vinsertf128_ps256((__v8sf)__V1, (__v4sf)__V2, (O)); }) 47455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 475b95ddf15e75a6ea27f10c410bbc7a82308b19f4bChad Rosier#define _mm256_insertf128_si256(V1, V2, O) __extension__ ({ \ 476b95ddf15e75a6ea27f10c410bbc7a82308b19f4bChad Rosier __m256i __V1 = (V1); \ 477b95ddf15e75a6ea27f10c410bbc7a82308b19f4bChad Rosier __m128i __V2 = (V2); \ 47834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper (__m256i)__builtin_ia32_vinsertf128_si256((__v8si)__V1, (__v4si)__V2, (O)); }) 47955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 48055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 4814f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_insert_epi32(__m256i __a, int __b, int const __imm) 48255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 4834f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v8si __c = (__v8si)__a; 4844f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __c[__imm & 7] = __b; 4854f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__c; 48655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 48755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 48855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 4894f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_insert_epi16(__m256i __a, int __b, int const __imm) 49055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 4914f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v16hi __c = (__v16hi)__a; 4924f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __c[__imm & 15] = __b; 4934f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__c; 49455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 49555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 49655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 4974f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_insert_epi8(__m256i __a, int __b, int const __imm) 49855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 4994f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v32qi __c = (__v32qi)__a; 5004f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __c[__imm & 31] = __b; 5014f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__c; 50255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 50355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 50455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#ifdef __x86_64__ 50555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 5064f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_insert_epi64(__m256i __a, int __b, int const __imm) 50755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5084f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v4di __c = (__v4di)__a; 5094f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __c[__imm & 3] = __b; 5104f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__c; 51155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 51255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#endif 51355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 51455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Conversion */ 51555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 5164f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cvtepi32_pd(__m128i __a) 51755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5184f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)__builtin_ia32_cvtdq2pd256((__v4si) __a); 51955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 52055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 52155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 5224f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cvtepi32_ps(__m256i __a) 52355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5244f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_cvtdq2ps256((__v8si) __a); 52555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 52655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 52755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m128 __attribute__((__always_inline__, __nodebug__)) 5284f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cvtpd_ps(__m256d __a) 52955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5304f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a); 53155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 53255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 53355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 5344f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cvtps_epi32(__m256 __a) 53555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5364f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_cvtps2dq256((__v8sf) __a); 53755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 53855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 53955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 5404f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cvtps_pd(__m128 __a) 54155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5424f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)__builtin_ia32_cvtps2pd256((__v4sf) __a); 54355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 54455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 54555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m128i __attribute__((__always_inline__, __nodebug__)) 5464f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cvttpd_epi32(__m256d __a) 54755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5484f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a); 54955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 55055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 55155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m128i __attribute__((__always_inline__, __nodebug__)) 5524f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cvtpd_epi32(__m256d __a) 55355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5544f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a); 55555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 55655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 55755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 5584f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cvttps_epi32(__m256 __a) 55955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5604f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a); 56155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 56255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 56355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Vector replicate */ 56455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 5654f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_movehdup_ps(__m256 __a) 56655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5674f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7); 56855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 56955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 57055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 5714f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_moveldup_ps(__m256 __a) 57255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5734f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_shufflevector(__a, __a, 0, 0, 2, 2, 4, 4, 6, 6); 57455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 57555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 57655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 5774f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_movedup_pd(__m256d __a) 57855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5794f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_shufflevector(__a, __a, 0, 0, 2, 2); 58055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 58155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 58255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Unpack and Interleave */ 58355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 5844f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_unpackhi_pd(__m256d __a, __m256d __b) 58555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5864f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_shufflevector(__a, __b, 1, 5, 1+2, 5+2); 58755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 58855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 58955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 5904f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_unpacklo_pd(__m256d __a, __m256d __b) 59155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5924f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_shufflevector(__a, __b, 0, 4, 0+2, 4+2); 59355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 59455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 59555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 5964f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_unpackhi_ps(__m256 __a, __m256 __b) 59755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5984f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_shufflevector(__a, __b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1); 59955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 60055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 60155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 6024f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_unpacklo_ps(__m256 __a, __m256 __b) 60355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6044f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_shufflevector(__a, __b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1); 60555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 60655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 60755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Bit Test */ 60855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6094f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_testz_pd(__m128d __a, __m128d __b) 61055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6114f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b); 61255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 61355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 61455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6154f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_testc_pd(__m128d __a, __m128d __b) 61655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6174f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b); 61855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 61955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 62055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6214f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_testnzc_pd(__m128d __a, __m128d __b) 62255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6234f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b); 62455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 62555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 62655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6274f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_testz_ps(__m128 __a, __m128 __b) 62855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6294f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b); 63055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 63155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 63255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6334f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_testc_ps(__m128 __a, __m128 __b) 63455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6354f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b); 63655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 63755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 63855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6394f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_testnzc_ps(__m128 __a, __m128 __b) 64055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6414f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b); 64255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 64355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 64455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6454f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_testz_pd(__m256d __a, __m256d __b) 64655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6474f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b); 64855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 64955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 65055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6514f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_testc_pd(__m256d __a, __m256d __b) 65255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6534f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b); 65455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 65555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 65655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6574f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_testnzc_pd(__m256d __a, __m256d __b) 65855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6594f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b); 66055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 66155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 66255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6634f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_testz_ps(__m256 __a, __m256 __b) 66455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6654f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b); 66655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 66755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 66855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6694f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_testc_ps(__m256 __a, __m256 __b) 67055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6714f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b); 67255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 67355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 67455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6754f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_testnzc_ps(__m256 __a, __m256 __b) 67655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6774f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b); 67855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 67955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 68055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6814f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_testz_si256(__m256i __a, __m256i __b) 68255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6834f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b); 68455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 68555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 68655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6874f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_testc_si256(__m256i __a, __m256i __b) 68855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6894f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b); 69055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 69155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 69255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6934f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_testnzc_si256(__m256i __a, __m256i __b) 69455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6954f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b); 69655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 69755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 69855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Vector extract sign mask */ 69955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 7004f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_movemask_pd(__m256d __a) 70155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 7024f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_movmskpd256((__v4df)__a); 70355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 70455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 70555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 7064f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_movemask_ps(__m256 __a) 70755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 7084f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_movmskps256((__v8sf)__a); 70955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 71055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 7114f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie/* Vector __zero */ 71255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 71355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes_mm256_zeroall(void) 71455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 71555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes __builtin_ia32_vzeroall(); 71655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 71755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 71855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 71955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes_mm256_zeroupper(void) 72055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 72155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes __builtin_ia32_vzeroupper(); 72255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 72355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 72455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Vector load with broadcast */ 72555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m128 __attribute__((__always_inline__, __nodebug__)) 7264f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_broadcast_ss(float const *__a) 72755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 7284f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m128)__builtin_ia32_vbroadcastss(__a); 72955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 73055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 73155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 7324f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_broadcast_sd(double const *__a) 73355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 7344f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)__builtin_ia32_vbroadcastsd256(__a); 73555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 73655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 73755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 7384f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_broadcast_ss(float const *__a) 73955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 7404f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_vbroadcastss256(__a); 74155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 74255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 74355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 7444f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_broadcast_pd(__m128d const *__a) 74555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 7464f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)__builtin_ia32_vbroadcastf128_pd256(__a); 74755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 74855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 74955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 7504f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_broadcast_ps(__m128 const *__a) 75155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 7524f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_vbroadcastf128_ps256(__a); 75355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 75455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 75555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* SIMD load ops */ 75655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 7574f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_load_pd(double const *__p) 75855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 7594f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return *(__m256d *)__p; 76055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 76155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 76255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 7634f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_load_ps(float const *__p) 76455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 7654f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return *(__m256 *)__p; 76655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 76755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 76855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 7694f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_loadu_pd(double const *__p) 77055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 7712ee2ac2293f313dfe1c6eb7034527a92b5d23158Craig Topper struct __loadu_pd { 7724f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __m256d __v; 7732ee2ac2293f313dfe1c6eb7034527a92b5d23158Craig Topper } __attribute__((packed, may_alias)); 7744f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return ((struct __loadu_pd*)__p)->__v; 77555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 77655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 77755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 7784f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_loadu_ps(float const *__p) 77955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 7802ee2ac2293f313dfe1c6eb7034527a92b5d23158Craig Topper struct __loadu_ps { 7814f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __m256 __v; 7822ee2ac2293f313dfe1c6eb7034527a92b5d23158Craig Topper } __attribute__((packed, may_alias)); 7834f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return ((struct __loadu_ps*)__p)->__v; 78455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 78555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 78655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 7874f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_load_si256(__m256i const *__p) 78855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 7894f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return *__p; 79055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 79155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 79255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 7934f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_loadu_si256(__m256i const *__p) 79455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 7952ee2ac2293f313dfe1c6eb7034527a92b5d23158Craig Topper struct __loadu_si256 { 7964f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __m256i __v; 7972ee2ac2293f313dfe1c6eb7034527a92b5d23158Craig Topper } __attribute__((packed, may_alias)); 7984f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return ((struct __loadu_si256*)__p)->__v; 79955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 80055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 80155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 8024f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_lddqu_si256(__m256i const *__p) 80355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8044f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_lddqu256((char const *)__p); 80555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 80655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 80755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* SIMD store ops */ 80855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 8094f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_store_pd(double *__p, __m256d __a) 81055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8114f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie *(__m256d *)__p = __a; 81255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 81355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 81455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 8154f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_store_ps(float *__p, __m256 __a) 81655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8174f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie *(__m256 *)__p = __a; 81855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 81955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 82055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 8214f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_storeu_pd(double *__p, __m256d __a) 82255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8234f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_storeupd256(__p, (__v4df)__a); 82455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 82555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 82655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 8274f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_storeu_ps(float *__p, __m256 __a) 82855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8294f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_storeups256(__p, (__v8sf)__a); 83055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 83155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 83255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 8334f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_store_si256(__m256i *__p, __m256i __a) 83455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8354f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie *__p = __a; 83655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 83755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 83855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 8394f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_storeu_si256(__m256i *__p, __m256i __a) 84055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8414f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_storedqu256((char *)__p, (__v32qi)__a); 84255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 84355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 84455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Conditional load ops */ 84555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m128d __attribute__((__always_inline__, __nodebug__)) 8464f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_maskload_pd(double const *__p, __m128d __m) 84755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8484f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2df)__m); 84955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 85055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 85155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 8524f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_maskload_pd(double const *__p, __m256d __m) 85355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8544f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)__builtin_ia32_maskloadpd256((const __v4df *)__p, 8554f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie (__v4df)__m); 85655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 85755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 85855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m128 __attribute__((__always_inline__, __nodebug__)) 8594f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_maskload_ps(float const *__p, __m128 __m) 86055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8614f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4sf)__m); 86255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 86355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 86455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 8654f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_maskload_ps(float const *__p, __m256 __m) 86655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8674f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8sf)__m); 86855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 86955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 87055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Conditional store ops */ 87155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 8724f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_maskstore_ps(float *__p, __m256 __m, __m256 __a) 87355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8744f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8sf)__m, (__v8sf)__a); 87555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 87655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 87755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 8784f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_maskstore_pd(double *__p, __m128d __m, __m128d __a) 87955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8804f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_maskstorepd((__v2df *)__p, (__v2df)__m, (__v2df)__a); 88155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 88255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 88355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 8844f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_maskstore_pd(double *__p, __m256d __m, __m256d __a) 88555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8864f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4df)__m, (__v4df)__a); 88755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 88855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 88955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 8904f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_maskstore_ps(float *__p, __m128 __m, __m128 __a) 89155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8924f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4sf)__m, (__v4sf)__a); 89355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 89455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 89555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Cacheability support ops */ 89655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 8974f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_stream_si256(__m256i *__a, __m256i __b) 89855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8994f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_movntdq256((__v4di *)__a, (__v4di)__b); 90055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 90155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 90255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 9034f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_stream_pd(double *__a, __m256d __b) 90455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 9054f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_movntpd256(__a, (__v4df)__b); 90655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 90755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 90855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 9094f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_stream_ps(float *__p, __m256 __a) 91055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 9114f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_movntps256(__p, (__v8sf)__a); 91255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 91355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 91455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Create vectors */ 91555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 9164f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_set_pd(double __a, double __b, double __c, double __d) 91755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 9184f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d){ __d, __c, __b, __a }; 91955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 92055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 92155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 9224f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_set_ps(float __a, float __b, float __c, float __d, 9234f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie float __e, float __f, float __g, float __h) 92455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 9254f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256){ __h, __g, __f, __e, __d, __c, __b, __a }; 92655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 92755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 92855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 9294f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_set_epi32(int __i0, int __i1, int __i2, int __i3, 9304f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie int __i4, int __i5, int __i6, int __i7) 93155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 9324f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)(__v8si){ __i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0 }; 93355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 93455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 93555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 9364f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_set_epi16(short __w15, short __w14, short __w13, short __w12, 9374f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie short __w11, short __w10, short __w09, short __w08, 9384f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie short __w07, short __w06, short __w05, short __w04, 9394f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie short __w03, short __w02, short __w01, short __w00) 94055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 9414f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)(__v16hi){ __w00, __w01, __w02, __w03, __w04, __w05, __w06, 9424f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15 }; 94355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 94455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 94555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 9464f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_set_epi8(char __b31, char __b30, char __b29, char __b28, 9474f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b27, char __b26, char __b25, char __b24, 9484f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b23, char __b22, char __b21, char __b20, 9494f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b19, char __b18, char __b17, char __b16, 9504f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b15, char __b14, char __b13, char __b12, 9514f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b11, char __b10, char __b09, char __b08, 9524f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b07, char __b06, char __b05, char __b04, 9534f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b03, char __b02, char __b01, char __b00) 95455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 95555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes return (__m256i)(__v32qi){ 9564f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07, 9574f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15, 9584f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23, 9594f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31 96055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes }; 96155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 96255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 96355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 9644f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d) 96555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 9664f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)(__v4di){ __d, __c, __b, __a }; 96755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 96855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 96955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Create vectors with elements in reverse order */ 97055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 9714f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_setr_pd(double __a, double __b, double __c, double __d) 97255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 9734f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d){ __a, __b, __c, __d }; 97455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 97555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 97655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 9774f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_setr_ps(float __a, float __b, float __c, float __d, 9784f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie float __e, float __f, float __g, float __h) 97955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 9804f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256){ __a, __b, __c, __d, __e, __f, __g, __h }; 98155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 98255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 98355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 9844f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3, 9854f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie int __i4, int __i5, int __i6, int __i7) 98655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 9874f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)(__v8si){ __i0, __i1, __i2, __i3, __i4, __i5, __i6, __i7 }; 98855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 98955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 99055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 9914f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12, 9924f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie short __w11, short __w10, short __w09, short __w08, 9934f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie short __w07, short __w06, short __w05, short __w04, 9944f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie short __w03, short __w02, short __w01, short __w00) 99555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 9964f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)(__v16hi){ __w15, __w14, __w13, __w12, __w11, __w10, __w09, 9974f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __w08, __w07, __w06, __w05, __w04, __w03, __w02, __w01, __w00 }; 99855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 99955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 100055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 10014f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28, 10024f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b27, char __b26, char __b25, char __b24, 10034f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b23, char __b22, char __b21, char __b20, 10044f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b19, char __b18, char __b17, char __b16, 10054f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b15, char __b14, char __b13, char __b12, 10064f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b11, char __b10, char __b09, char __b08, 10074f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b07, char __b06, char __b05, char __b04, 10084f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b03, char __b02, char __b01, char __b00) 100955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 101055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes return (__m256i)(__v32qi){ 10114f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __b31, __b30, __b29, __b28, __b27, __b26, __b25, __b24, 10124f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __b23, __b22, __b21, __b20, __b19, __b18, __b17, __b16, 10134f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __b15, __b14, __b13, __b12, __b11, __b10, __b09, __b08, 10144f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __b07, __b06, __b05, __b04, __b03, __b02, __b01, __b00 }; 101555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 101655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 101755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 10184f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d) 101955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 10204f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)(__v4di){ __a, __b, __c, __d }; 102155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 102255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 102355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Create vectors with repeated elements */ 102455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 10254f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_set1_pd(double __w) 102655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 10274f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d){ __w, __w, __w, __w }; 102855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 102955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 103055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 10314f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_set1_ps(float __w) 103255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 10334f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256){ __w, __w, __w, __w, __w, __w, __w, __w }; 103455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 103555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 103655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 10374f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_set1_epi32(int __i) 103855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 10394f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)(__v8si){ __i, __i, __i, __i, __i, __i, __i, __i }; 104055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 104155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 104255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 10434f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_set1_epi16(short __w) 104455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 10454f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)(__v16hi){ __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, 10464f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __w, __w, __w, __w, __w, __w }; 104755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 104855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 104955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 10504f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_set1_epi8(char __b) 105155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 10524f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)(__v32qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, 10534f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, 10544f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __b, __b, __b, __b, __b, __b, __b }; 105555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 105655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 105755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 10584f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_set1_epi64x(long long __q) 105955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 10604f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)(__v4di){ __q, __q, __q, __q }; 106155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 106255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 10634f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie/* Create __zeroed vectors */ 106455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 106555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes_mm256_setzero_pd(void) 106655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 106755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes return (__m256d){ 0, 0, 0, 0 }; 106855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 106955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 107055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 107155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes_mm256_setzero_ps(void) 107255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 107355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes return (__m256){ 0, 0, 0, 0, 0, 0, 0, 0 }; 107455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 107555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 107655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 107755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes_mm256_setzero_si256(void) 107855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 107955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes return (__m256i){ 0LL, 0LL, 0LL, 0LL }; 108055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 108155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 108255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Cast between vector types */ 108355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 1084f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner_mm256_castpd_ps(__m256d __a) 108555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1086f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner return (__m256)__a; 108755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 108855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 108955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 1090f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner_mm256_castpd_si256(__m256d __a) 109155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1092f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner return (__m256i)__a; 109355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 109455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 109555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 1096f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner_mm256_castps_pd(__m256 __a) 109755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1098f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner return (__m256d)__a; 109955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 110055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 110155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 1102f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner_mm256_castps_si256(__m256 __a) 110355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1104f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner return (__m256i)__a; 110555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 110655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 110755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 1108f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner_mm256_castsi256_ps(__m256i __a) 110955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1110f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner return (__m256)__a; 111155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 111255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 111355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 1114f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner_mm256_castsi256_pd(__m256i __a) 111555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1116f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner return (__m256d)__a; 111755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 111855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 111955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m128d __attribute__((__always_inline__, __nodebug__)) 1120f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner_mm256_castpd256_pd128(__m256d __a) 112155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1122f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner return __builtin_shufflevector(__a, __a, 0, 1); 112355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 112455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 112555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m128 __attribute__((__always_inline__, __nodebug__)) 1126f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner_mm256_castps256_ps128(__m256 __a) 112755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1128f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); 112955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 113055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 113155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m128i __attribute__((__always_inline__, __nodebug__)) 1132f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner_mm256_castsi256_si128(__m256i __a) 113355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1134f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner return __builtin_shufflevector(__a, __a, 0, 1); 113555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 113655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 113755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 1138f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner_mm256_castpd128_pd256(__m128d __a) 113955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1140d9d57e99793df47a8fc86cde9ecd656411f02777Craig Topper return __builtin_shufflevector(__a, __a, 0, 1, -1, -1); 114155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 114255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 114355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 1144f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner_mm256_castps128_ps256(__m128 __a) 114555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1146d9d57e99793df47a8fc86cde9ecd656411f02777Craig Topper return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1); 114755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 114855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 114955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 1150f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner_mm256_castsi128_si256(__m128i __a) 115155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1152d9d57e99793df47a8fc86cde9ecd656411f02777Craig Topper return __builtin_shufflevector(__a, __a, 0, 1, -1, -1); 115355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 1154db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier 1155db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier/* SIMD load ops (unaligned) */ 1156db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosierstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 11574f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo) 1158db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier{ 1159db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier struct __loadu_ps { 11604f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __m128 __v; 1161db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier } __attribute__((__packed__, __may_alias__)); 1162db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier 11634f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __m256 __v256 = _mm256_castps128_ps256(((struct __loadu_ps*)__addr_lo)->__v); 11644f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return _mm256_insertf128_ps(__v256, ((struct __loadu_ps*)__addr_hi)->__v, 1); 1165db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier} 1166db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier 1167db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosierstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 11684f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo) 1169db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier{ 1170db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier struct __loadu_pd { 11714f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __m128d __v; 1172db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier } __attribute__((__packed__, __may_alias__)); 1173db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier 11744f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __m256d __v256 = _mm256_castpd128_pd256(((struct __loadu_pd*)__addr_lo)->__v); 11754f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return _mm256_insertf128_pd(__v256, ((struct __loadu_pd*)__addr_hi)->__v, 1); 1176db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier} 1177db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier 1178db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosierstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 11794f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo) 1180db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier{ 1181db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier struct __loadu_si128 { 11824f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __m128i __v; 1183db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier } __attribute__((packed, may_alias)); 11844f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __m256i __v256 = _mm256_castsi128_si256( 11854f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie ((struct __loadu_si128*)__addr_lo)->__v); 11864f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return _mm256_insertf128_si256(__v256, 11874f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie ((struct __loadu_si128*)__addr_hi)->__v, 1); 1188db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier} 1189db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier 1190db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier/* SIMD store ops (unaligned) */ 1191db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosierstatic __inline void __attribute__((__always_inline__, __nodebug__)) 11924f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a) 1193db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier{ 11944f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __m128 __v128; 1195db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier 11964f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v128 = _mm256_castps256_ps128(__a); 11974f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_storeups(__addr_lo, __v128); 11984f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v128 = _mm256_extractf128_ps(__a, 1); 11994f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_storeups(__addr_hi, __v128); 1200db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier} 1201db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier 1202db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosierstatic __inline void __attribute__((__always_inline__, __nodebug__)) 12034f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a) 1204db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier{ 12054f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __m128d __v128; 1206db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier 12074f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v128 = _mm256_castpd256_pd128(__a); 12084f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_storeupd(__addr_lo, __v128); 12094f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v128 = _mm256_extractf128_pd(__a, 1); 12104f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_storeupd(__addr_hi, __v128); 1211db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier} 1212db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier 1213db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosierstatic __inline void __attribute__((__always_inline__, __nodebug__)) 12144f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a) 1215db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier{ 12164f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __m128i __v128; 1217db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier 12184f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v128 = _mm256_castsi256_si128(__a); 12194f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_storedqu((char *)__addr_lo, (__v16qi)__v128); 12204f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v128 = _mm256_extractf128_si256(__a, 1); 12214f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_storedqu((char *)__addr_hi, (__v16qi)__v128); 1222db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier} 12237cb4fae8da8f541f43d39896c989b06c69fd7821Richard Smith 12247cb4fae8da8f541f43d39896c989b06c69fd7821Richard Smith#endif /* __AVXINTRIN_H */ 1225