155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/*===---- avxintrin.h - AVX intrinsics -------------------------------------=== 255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * 355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * Permission is hereby granted, free of charge, to any person obtaining a copy 455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * of this software and associated documentation files (the "Software"), to deal 555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * in the Software without restriction, including without limitation the rights 655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * copies of the Software, and to permit persons to whom the Software is 855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * furnished to do so, subject to the following conditions: 955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * 1055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * The above copyright notice and this permission notice shall be included in 1155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * all copies or substantial portions of the Software. 1255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * 1355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 1655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 1855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 1955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * THE SOFTWARE. 2055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes * 2155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes *===-----------------------------------------------------------------------=== 2255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes */ 2355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 2401b57e362a2c8abb18ba6139ca212e6c7f2288b0Benjamin Kramer#ifndef __IMMINTRIN_H 2501b57e362a2c8abb18ba6139ca212e6c7f2288b0Benjamin Kramer#error "Never use <avxintrin.h> directly; include <immintrin.h> instead." 2601b57e362a2c8abb18ba6139ca212e6c7f2288b0Benjamin Kramer#endif 2755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 287cb4fae8da8f541f43d39896c989b06c69fd7821Richard Smith#ifndef __AVXINTRIN_H 297cb4fae8da8f541f43d39896c989b06c69fd7821Richard Smith#define __AVXINTRIN_H 307cb4fae8da8f541f43d39896c989b06c69fd7821Richard Smith 3155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopestypedef double __v4df __attribute__ ((__vector_size__ (32))); 3255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopestypedef float __v8sf __attribute__ ((__vector_size__ (32))); 3355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopestypedef long long __v4di __attribute__ ((__vector_size__ (32))); 3455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopestypedef int __v8si __attribute__ ((__vector_size__ (32))); 3555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopestypedef short __v16hi __attribute__ ((__vector_size__ (32))); 3655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopestypedef char __v32qi __attribute__ ((__vector_size__ (32))); 3755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 3855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopestypedef float __m256 __attribute__ ((__vector_size__ (32))); 3955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopestypedef double __m256d __attribute__((__vector_size__(32))); 4055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopestypedef long long __m256i __attribute__((__vector_size__(32))); 4155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 4255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Arithmetic */ 4355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 444f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_add_pd(__m256d __a, __m256d __b) 4555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 464f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __a+__b; 4755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 4855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 4955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 504f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_add_ps(__m256 __a, __m256 __b) 5155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 524f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __a+__b; 5355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 5455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 5555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 564f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sub_pd(__m256d __a, __m256d __b) 5755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 584f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __a-__b; 5955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 6055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 6155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 624f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sub_ps(__m256 __a, __m256 __b) 6355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 644f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __a-__b; 6555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 6655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 6755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 684f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_addsub_pd(__m256d __a, __m256d __b) 6955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 704f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b); 7155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 7255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 7355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 744f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_addsub_ps(__m256 __a, __m256 __b) 7555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 764f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b); 7755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 7855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 7955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 804f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_div_pd(__m256d __a, __m256d __b) 8155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 824f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __a / __b; 8355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 8455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 8555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 864f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_div_ps(__m256 __a, __m256 __b) 8755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 884f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __a / __b; 8955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 9055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 9155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 924f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_max_pd(__m256d __a, __m256d __b) 9355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 944f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b); 9555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 9655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 9755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 984f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_max_ps(__m256 __a, __m256 __b) 9955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1004f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b); 10155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 10255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 10355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 1044f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_min_pd(__m256d __a, __m256d __b) 10555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1064f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b); 10755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 10855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 10955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 1104f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_min_ps(__m256 __a, __m256 __b) 11155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1124f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b); 11355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 11455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 11555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 1164f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_mul_pd(__m256d __a, __m256d __b) 11755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1184f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __a * __b; 11955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 12055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 12155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 1224f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_mul_ps(__m256 __a, __m256 __b) 12355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1244f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __a * __b; 12555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 12655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 12755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 1284f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sqrt_pd(__m256d __a) 12955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1304f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a); 13155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 13255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 13355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 1344f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_sqrt_ps(__m256 __a) 13555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1364f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a); 13755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 13855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 13955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 1404f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_rsqrt_ps(__m256 __a) 14155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1424f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_rsqrtps256((__v8sf)__a); 14355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 14455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 14555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 1464f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_rcp_ps(__m256 __a) 14755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1484f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_rcpps256((__v8sf)__a); 14955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 15055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 151b8786c4dc4d5a4c72f23a2d46cac5f9bc2641926Chad Rosier#define _mm256_round_pd(V, M) __extension__ ({ \ 152b8786c4dc4d5a4c72f23a2d46cac5f9bc2641926Chad Rosier __m256d __V = (V); \ 15334a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper (__m256d)__builtin_ia32_roundpd256((__v4df)__V, (M)); }) 15455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 155b8786c4dc4d5a4c72f23a2d46cac5f9bc2641926Chad Rosier#define _mm256_round_ps(V, M) __extension__ ({ \ 156b8786c4dc4d5a4c72f23a2d46cac5f9bc2641926Chad Rosier __m256 __V = (V); \ 15734a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper (__m256)__builtin_ia32_roundps256((__v8sf)__V, (M)); }) 15855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 15955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _mm256_ceil_pd(V) _mm256_round_pd((V), _MM_FROUND_CEIL) 16055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR) 16155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _mm256_ceil_ps(V) _mm256_round_ps((V), _MM_FROUND_CEIL) 16255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR) 16355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 16455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Logical */ 16555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 1664f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_and_pd(__m256d __a, __m256d __b) 16755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1684f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)((__v4di)__a & (__v4di)__b); 16955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 17055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 17155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 1724f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_and_ps(__m256 __a, __m256 __b) 17355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1744f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)((__v8si)__a & (__v8si)__b); 17555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 17655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 17755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 1784f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_andnot_pd(__m256d __a, __m256d __b) 17955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1804f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)(~(__v4di)__a & (__v4di)__b); 18155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 18255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 18355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 1844f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_andnot_ps(__m256 __a, __m256 __b) 18555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1864f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)(~(__v8si)__a & (__v8si)__b); 18755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 18855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 18955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 1904f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_or_pd(__m256d __a, __m256d __b) 19155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1924f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)((__v4di)__a | (__v4di)__b); 19355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 19455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 19555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 1964f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_or_ps(__m256 __a, __m256 __b) 19755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1984f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)((__v8si)__a | (__v8si)__b); 19955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 20055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 20155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 2024f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_xor_pd(__m256d __a, __m256d __b) 20355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 2044f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)((__v4di)__a ^ (__v4di)__b); 20555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 20655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 20755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 2084f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_xor_ps(__m256 __a, __m256 __b) 20955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 2104f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)((__v8si)__a ^ (__v8si)__b); 21155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 21255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 21355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Horizontal arithmetic */ 21455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 2154f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_hadd_pd(__m256d __a, __m256d __b) 21655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 2174f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b); 21855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 21955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 22055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 2214f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_hadd_ps(__m256 __a, __m256 __b) 22255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 2234f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b); 22455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 22555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 22655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 2274f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_hsub_pd(__m256d __a, __m256d __b) 22855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 2294f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b); 23055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 23155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 23255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 2334f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_hsub_ps(__m256 __a, __m256 __b) 23455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 2354f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b); 23655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 23755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 23855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Vector permutations */ 23955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m128d __attribute__((__always_inline__, __nodebug__)) 2404f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_permutevar_pd(__m128d __a, __m128i __c) 24155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 2424f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c); 24355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 24455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 24555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 2464f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_permutevar_pd(__m256d __a, __m256i __c) 24755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 2484f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c); 24955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 25055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 25155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m128 __attribute__((__always_inline__, __nodebug__)) 2524f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_permutevar_ps(__m128 __a, __m128i __c) 25355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 2544f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c); 25555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 25655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 25755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 2584f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_permutevar_ps(__m256 __a, __m256i __c) 25955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 2604f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, 2614f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie (__v8si)__c); 26255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 26355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 264c17f88efa20c9e12c7e07bf02041fd1f0e65d65bChad Rosier#define _mm_permute_pd(A, C) __extension__ ({ \ 265c17f88efa20c9e12c7e07bf02041fd1f0e65d65bChad Rosier __m128d __A = (A); \ 26610c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper (__m128d)__builtin_shufflevector((__v2df)__A, (__v2df) _mm_setzero_pd(), \ 26710c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper (C) & 0x1, ((C) & 0x2) >> 1); }) 26855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 269c17f88efa20c9e12c7e07bf02041fd1f0e65d65bChad Rosier#define _mm256_permute_pd(A, C) __extension__ ({ \ 270c17f88efa20c9e12c7e07bf02041fd1f0e65d65bChad Rosier __m256d __A = (A); \ 27110c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper (__m256d)__builtin_shufflevector((__v4df)__A, (__v4df) _mm256_setzero_pd(), \ 27210c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper (C) & 0x1, ((C) & 0x2) >> 1, \ 27310c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper 2 + (((C) & 0x4) >> 2), \ 27410c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper 2 + (((C) & 0x8) >> 3)); }) 27555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 276d7dd7755fc5092c69f492d6f32cb0e34e63c6a53Chad Rosier#define _mm_permute_ps(A, C) __extension__ ({ \ 277d7dd7755fc5092c69f492d6f32cb0e34e63c6a53Chad Rosier __m128 __A = (A); \ 27810c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper (__m128)__builtin_shufflevector((__v4sf)__A, (__v4sf) _mm_setzero_ps(), \ 27910c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper (C) & 0x3, ((C) & 0xc) >> 2, \ 2805629646711d9c748feb1043a7df2d5ca7d1bdfc4Craig Topper ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6); }) 28155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 282d7dd7755fc5092c69f492d6f32cb0e34e63c6a53Chad Rosier#define _mm256_permute_ps(A, C) __extension__ ({ \ 283d7dd7755fc5092c69f492d6f32cb0e34e63c6a53Chad Rosier __m256 __A = (A); \ 28410c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper (__m256)__builtin_shufflevector((__v8sf)__A, (__v8sf) _mm256_setzero_ps(), \ 28510c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper (C) & 0x3, ((C) & 0xc) >> 2, \ 28610c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6, \ 28710c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper 4 + (((C) & 0x03) >> 0), \ 28810c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper 4 + (((C) & 0x0c) >> 2), \ 28910c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper 4 + (((C) & 0x30) >> 4), \ 29010c57a87d97adb0390c1dd0a69feb7862d5db4a3Craig Topper 4 + (((C) & 0xc0) >> 6)); }) 29155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 292c5cda1121e270548ecf258d0ed72919a5211a94eChad Rosier#define _mm256_permute2f128_pd(V1, V2, M) __extension__ ({ \ 293c5cda1121e270548ecf258d0ed72919a5211a94eChad Rosier __m256d __V1 = (V1); \ 294c5cda1121e270548ecf258d0ed72919a5211a94eChad Rosier __m256d __V2 = (V2); \ 29549a110db4c43835681bb89671f8f73c8d8c7c28cCraig Topper (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)__V1, (__v4df)__V2, (M)); }) 29655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 297c5cda1121e270548ecf258d0ed72919a5211a94eChad Rosier#define _mm256_permute2f128_ps(V1, V2, M) __extension__ ({ \ 298c5cda1121e270548ecf258d0ed72919a5211a94eChad Rosier __m256 __V1 = (V1); \ 299c5cda1121e270548ecf258d0ed72919a5211a94eChad Rosier __m256 __V2 = (V2); \ 30049a110db4c43835681bb89671f8f73c8d8c7c28cCraig Topper (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)__V1, (__v8sf)__V2, (M)); }) 30155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 302c5cda1121e270548ecf258d0ed72919a5211a94eChad Rosier#define _mm256_permute2f128_si256(V1, V2, M) __extension__ ({ \ 303c5cda1121e270548ecf258d0ed72919a5211a94eChad Rosier __m256i __V1 = (V1); \ 304c5cda1121e270548ecf258d0ed72919a5211a94eChad Rosier __m256i __V2 = (V2); \ 30549a110db4c43835681bb89671f8f73c8d8c7c28cCraig Topper (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)__V1, (__v8si)__V2, (M)); }) 30655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 30755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Vector Blend */ 308347208968c303a9c11fe29012f6dc49680465182Eli Friedman#define _mm256_blend_pd(V1, V2, M) __extension__ ({ \ 309347208968c303a9c11fe29012f6dc49680465182Eli Friedman __m256d __V1 = (V1); \ 310347208968c303a9c11fe29012f6dc49680465182Eli Friedman __m256d __V2 = (V2); \ 3116bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (__m256d)__builtin_shufflevector((__v4df)__V1, (__v4df)__V2, \ 3126bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x01) ? 4 : 0), \ 3136bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x02) ? 5 : 1), \ 3146bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x04) ? 6 : 2), \ 3156bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x08) ? 7 : 3)); }) 31655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 317347208968c303a9c11fe29012f6dc49680465182Eli Friedman#define _mm256_blend_ps(V1, V2, M) __extension__ ({ \ 318347208968c303a9c11fe29012f6dc49680465182Eli Friedman __m256 __V1 = (V1); \ 319347208968c303a9c11fe29012f6dc49680465182Eli Friedman __m256 __V2 = (V2); \ 3206bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (__m256)__builtin_shufflevector((__v8sf)__V1, (__v8sf)__V2, \ 3216bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x01) ? 8 : 0), \ 3226bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x02) ? 9 : 1), \ 3236bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x04) ? 10 : 2), \ 3246bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x08) ? 11 : 3), \ 3256bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x10) ? 12 : 4), \ 3266bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x20) ? 13 : 5), \ 3276bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x40) ? 14 : 6), \ 3286bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines (((M) & 0x80) ? 15 : 7)); }) 32955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 33055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 3314f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c) 33255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 3334f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)__builtin_ia32_blendvpd256( 3344f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie (__v4df)__a, (__v4df)__b, (__v4df)__c); 33555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 33655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 33755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 3384f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) 33955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 34041a5fc56b5d66372318a984e30bae5a832787691David Blaikie return (__m256)__builtin_ia32_blendvps256( 3414f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie (__v8sf)__a, (__v8sf)__b, (__v8sf)__c); 34255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 34355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 34455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Vector Dot Product */ 345347208968c303a9c11fe29012f6dc49680465182Eli Friedman#define _mm256_dp_ps(V1, V2, M) __extension__ ({ \ 346347208968c303a9c11fe29012f6dc49680465182Eli Friedman __m256 __V1 = (V1); \ 347347208968c303a9c11fe29012f6dc49680465182Eli Friedman __m256 __V2 = (V2); \ 34834a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper (__m256)__builtin_ia32_dpps256((__v8sf)__V1, (__v8sf)__V2, (M)); }) 34955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 35055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Vector shuffle */ 35132bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson#define _mm256_shuffle_ps(a, b, mask) __extension__ ({ \ 35232bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m256 __a = (a); \ 35332bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m256 __b = (b); \ 35432bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson (__m256)__builtin_shufflevector((__v8sf)__a, (__v8sf)__b, \ 355b33aa0f7dfa3a6cadc8ac1ac910f36680cbf7a76Bruno Cardoso Lopes (mask) & 0x3, ((mask) & 0xc) >> 2, \ 35670141c2d11ba555ff5922d8b4a014be2f629e2ecBruno Cardoso Lopes (((mask) & 0x30) >> 4) + 8, (((mask) & 0xc0) >> 6) + 8, \ 357426344dc225978deaa79545e8e14366fa4f8e68dBruno Cardoso Lopes ((mask) & 0x3) + 4, (((mask) & 0xc) >> 2) + 4, \ 35832bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson (((mask) & 0x30) >> 4) + 12, (((mask) & 0xc0) >> 6) + 12); }) 359b33aa0f7dfa3a6cadc8ac1ac910f36680cbf7a76Bruno Cardoso Lopes 36032bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson#define _mm256_shuffle_pd(a, b, mask) __extension__ ({ \ 36132bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m256d __a = (a); \ 36232bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m256d __b = (b); \ 36332bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson (__m256d)__builtin_shufflevector((__v4df)__a, (__v4df)__b, \ 364b33aa0f7dfa3a6cadc8ac1ac910f36680cbf7a76Bruno Cardoso Lopes (mask) & 0x1, \ 365b33aa0f7dfa3a6cadc8ac1ac910f36680cbf7a76Bruno Cardoso Lopes (((mask) & 0x2) >> 1) + 4, \ 366b33aa0f7dfa3a6cadc8ac1ac910f36680cbf7a76Bruno Cardoso Lopes (((mask) & 0x4) >> 2) + 2, \ 36732bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson (((mask) & 0x8) >> 3) + 6); }) 36855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 36955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Compare */ 37055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */ 37155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */ 37255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */ 37355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */ 37455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */ 37555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */ 37655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */ 37755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_ORD_Q 0x07 /* Ordered (nonsignaling) */ 37855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */ 37955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unord, signaling) */ 38055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */ 38155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */ 38255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */ 38355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */ 38455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */ 38555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */ 38655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */ 38755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */ 38855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */ 38955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_UNORD_S 0x13 /* Unordered (signaling) */ 39055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */ 39155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */ 39255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unord, non-signaling) */ 39355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_ORD_S 0x17 /* Ordered (signaling) */ 39455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_EQ_US 0x18 /* Equal (unordered, signaling) */ 39555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unord, non-sign) */ 39655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */ 39755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_FALSE_OS 0x1b /* False (ordered, signaling) */ 39855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */ 39955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */ 40055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */ 40155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */ 40255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 40332bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson#define _mm_cmp_pd(a, b, c) __extension__ ({ \ 40432bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m128d __a = (a); \ 40532bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m128d __b = (b); \ 40632bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson (__m128d)__builtin_ia32_cmppd((__v2df)__a, (__v2df)__b, (c)); }) 40732bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson 40832bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson#define _mm_cmp_ps(a, b, c) __extension__ ({ \ 40932bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m128 __a = (a); \ 41032bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m128 __b = (b); \ 41132bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson (__m128)__builtin_ia32_cmpps((__v4sf)__a, (__v4sf)__b, (c)); }) 41232bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson 41332bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson#define _mm256_cmp_pd(a, b, c) __extension__ ({ \ 41432bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m256d __a = (a); \ 41532bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m256d __b = (b); \ 41632bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson (__m256d)__builtin_ia32_cmppd256((__v4df)__a, (__v4df)__b, (c)); }) 41732bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson 41832bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson#define _mm256_cmp_ps(a, b, c) __extension__ ({ \ 41932bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m256 __a = (a); \ 42032bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m256 __b = (b); \ 42132bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson (__m256)__builtin_ia32_cmpps256((__v8sf)__a, (__v8sf)__b, (c)); }) 42232bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson 42332bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson#define _mm_cmp_sd(a, b, c) __extension__ ({ \ 42432bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m128d __a = (a); \ 42532bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m128d __b = (b); \ 42632bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson (__m128d)__builtin_ia32_cmpsd((__v2df)__a, (__v2df)__b, (c)); }) 42732bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson 42832bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson#define _mm_cmp_ss(a, b, c) __extension__ ({ \ 42932bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m128 __a = (a); \ 43032bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson __m128 __b = (b); \ 43132bae37b821e6ade738849ac14e3d3de06afb0beBob Wilson (__m128)__builtin_ia32_cmpss((__v4sf)__a, (__v4sf)__b, (c)); }) 43255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 43355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Vector extract */ 4341e4faf56cd310dbd89b7d192db57c3d120bec8a2Chad Rosier#define _mm256_extractf128_pd(A, O) __extension__ ({ \ 4351e4faf56cd310dbd89b7d192db57c3d120bec8a2Chad Rosier __m256d __A = (A); \ 43634a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper (__m128d)__builtin_ia32_vextractf128_pd256((__v4df)__A, (O)); }) 43755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 4381e4faf56cd310dbd89b7d192db57c3d120bec8a2Chad Rosier#define _mm256_extractf128_ps(A, O) __extension__ ({ \ 4391e4faf56cd310dbd89b7d192db57c3d120bec8a2Chad Rosier __m256 __A = (A); \ 44034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper (__m128)__builtin_ia32_vextractf128_ps256((__v8sf)__A, (O)); }) 44155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 4421e4faf56cd310dbd89b7d192db57c3d120bec8a2Chad Rosier#define _mm256_extractf128_si256(A, O) __extension__ ({ \ 4431e4faf56cd310dbd89b7d192db57c3d120bec8a2Chad Rosier __m256i __A = (A); \ 44434a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper (__m128i)__builtin_ia32_vextractf128_si256((__v8si)__A, (O)); }) 44555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 44655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 4474f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_extract_epi32(__m256i __a, int const __imm) 44855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 4494f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v8si __b = (__v8si)__a; 4508484375b0fc442c704b3ec06e2e795d88591606fManman Ren return __b[__imm & 7]; 45155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 45255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 45355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 4544f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_extract_epi16(__m256i __a, int const __imm) 45555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 4564f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v16hi __b = (__v16hi)__a; 4578484375b0fc442c704b3ec06e2e795d88591606fManman Ren return __b[__imm & 15]; 45855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 45955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 46055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 4614f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_extract_epi8(__m256i __a, int const __imm) 46255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 4634f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v32qi __b = (__v32qi)__a; 4648484375b0fc442c704b3ec06e2e795d88591606fManman Ren return __b[__imm & 31]; 46555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 46655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 46755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#ifdef __x86_64__ 46855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline long long __attribute__((__always_inline__, __nodebug__)) 4694f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_extract_epi64(__m256i __a, const int __imm) 47055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 4714f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v4di __b = (__v4di)__a; 4728484375b0fc442c704b3ec06e2e795d88591606fManman Ren return __b[__imm & 3]; 47355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 47455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#endif 47555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 47655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Vector insert */ 477b95ddf15e75a6ea27f10c410bbc7a82308b19f4bChad Rosier#define _mm256_insertf128_pd(V1, V2, O) __extension__ ({ \ 478b95ddf15e75a6ea27f10c410bbc7a82308b19f4bChad Rosier __m256d __V1 = (V1); \ 479b95ddf15e75a6ea27f10c410bbc7a82308b19f4bChad Rosier __m128d __V2 = (V2); \ 48034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper (__m256d)__builtin_ia32_vinsertf128_pd256((__v4df)__V1, (__v2df)__V2, (O)); }) 48155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 482b95ddf15e75a6ea27f10c410bbc7a82308b19f4bChad Rosier#define _mm256_insertf128_ps(V1, V2, O) __extension__ ({ \ 483b95ddf15e75a6ea27f10c410bbc7a82308b19f4bChad Rosier __m256 __V1 = (V1); \ 484b95ddf15e75a6ea27f10c410bbc7a82308b19f4bChad Rosier __m128 __V2 = (V2); \ 48534a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper (__m256)__builtin_ia32_vinsertf128_ps256((__v8sf)__V1, (__v4sf)__V2, (O)); }) 48655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 487b95ddf15e75a6ea27f10c410bbc7a82308b19f4bChad Rosier#define _mm256_insertf128_si256(V1, V2, O) __extension__ ({ \ 488b95ddf15e75a6ea27f10c410bbc7a82308b19f4bChad Rosier __m256i __V1 = (V1); \ 489b95ddf15e75a6ea27f10c410bbc7a82308b19f4bChad Rosier __m128i __V2 = (V2); \ 49034a1da4354959522cd1721ce9ca099cc5c743f01Craig Topper (__m256i)__builtin_ia32_vinsertf128_si256((__v8si)__V1, (__v4si)__V2, (O)); }) 49155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 49255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 4934f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_insert_epi32(__m256i __a, int __b, int const __imm) 49455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 4954f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v8si __c = (__v8si)__a; 4964f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __c[__imm & 7] = __b; 4974f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__c; 49855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 49955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 50055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 5014f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_insert_epi16(__m256i __a, int __b, int const __imm) 50255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5034f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v16hi __c = (__v16hi)__a; 5044f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __c[__imm & 15] = __b; 5054f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__c; 50655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 50755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 50855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 5094f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_insert_epi8(__m256i __a, int __b, int const __imm) 51055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5114f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v32qi __c = (__v32qi)__a; 5124f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __c[__imm & 31] = __b; 5134f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__c; 51455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 51555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 51655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#ifdef __x86_64__ 51755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 5184f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_insert_epi64(__m256i __a, int __b, int const __imm) 51955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5204f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v4di __c = (__v4di)__a; 5214f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __c[__imm & 3] = __b; 5224f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__c; 52355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 52455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes#endif 52555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 52655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Conversion */ 52755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 5284f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cvtepi32_pd(__m128i __a) 52955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5304f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)__builtin_ia32_cvtdq2pd256((__v4si) __a); 53155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 53255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 53355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 5344f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cvtepi32_ps(__m256i __a) 53555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5364f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_cvtdq2ps256((__v8si) __a); 53755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 53855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 53955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m128 __attribute__((__always_inline__, __nodebug__)) 5404f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cvtpd_ps(__m256d __a) 54155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5424f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a); 54355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 54455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 54555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 5464f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cvtps_epi32(__m256 __a) 54755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5484f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_cvtps2dq256((__v8sf) __a); 54955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 55055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 55155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 5524f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cvtps_pd(__m128 __a) 55355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5544f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)__builtin_ia32_cvtps2pd256((__v4sf) __a); 55555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 55655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 55755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m128i __attribute__((__always_inline__, __nodebug__)) 5584f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cvttpd_epi32(__m256d __a) 55955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5604f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a); 56155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 56255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 56355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m128i __attribute__((__always_inline__, __nodebug__)) 5644f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cvtpd_epi32(__m256d __a) 56555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5664f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a); 56755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 56855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 56955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 5704f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_cvttps_epi32(__m256 __a) 57155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5724f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a); 57355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 57455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 57555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Vector replicate */ 57655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 5774f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_movehdup_ps(__m256 __a) 57855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5794f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7); 58055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 58155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 58255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 5834f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_moveldup_ps(__m256 __a) 58455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5854f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_shufflevector(__a, __a, 0, 0, 2, 2, 4, 4, 6, 6); 58655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 58755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 58855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 5894f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_movedup_pd(__m256d __a) 59055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5914f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_shufflevector(__a, __a, 0, 0, 2, 2); 59255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 59355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 59455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Unpack and Interleave */ 59555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 5964f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_unpackhi_pd(__m256d __a, __m256d __b) 59755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 5984f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_shufflevector(__a, __b, 1, 5, 1+2, 5+2); 59955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 60055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 60155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 6024f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_unpacklo_pd(__m256d __a, __m256d __b) 60355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6044f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_shufflevector(__a, __b, 0, 4, 0+2, 4+2); 60555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 60655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 60755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 6084f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_unpackhi_ps(__m256 __a, __m256 __b) 60955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6104f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_shufflevector(__a, __b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1); 61155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 61255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 61355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 6144f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_unpacklo_ps(__m256 __a, __m256 __b) 61555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6164f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_shufflevector(__a, __b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1); 61755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 61855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 61955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Bit Test */ 62055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6214f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_testz_pd(__m128d __a, __m128d __b) 62255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6234f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b); 62455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 62555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 62655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6274f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_testc_pd(__m128d __a, __m128d __b) 62855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6294f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b); 63055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 63155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 63255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6334f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_testnzc_pd(__m128d __a, __m128d __b) 63455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6354f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b); 63655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 63755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 63855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6394f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_testz_ps(__m128 __a, __m128 __b) 64055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6414f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b); 64255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 64355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 64455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6454f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_testc_ps(__m128 __a, __m128 __b) 64655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6474f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b); 64855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 64955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 65055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6514f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_testnzc_ps(__m128 __a, __m128 __b) 65255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6534f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b); 65455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 65555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 65655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6574f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_testz_pd(__m256d __a, __m256d __b) 65855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6594f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b); 66055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 66155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 66255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6634f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_testc_pd(__m256d __a, __m256d __b) 66455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6654f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b); 66655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 66755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 66855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6694f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_testnzc_pd(__m256d __a, __m256d __b) 67055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6714f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b); 67255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 67355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 67455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6754f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_testz_ps(__m256 __a, __m256 __b) 67655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6774f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b); 67855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 67955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 68055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6814f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_testc_ps(__m256 __a, __m256 __b) 68255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6834f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b); 68455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 68555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 68655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6874f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_testnzc_ps(__m256 __a, __m256 __b) 68855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6894f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b); 69055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 69155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 69255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6934f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_testz_si256(__m256i __a, __m256i __b) 69455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 6954f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b); 69655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 69755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 69855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 6994f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_testc_si256(__m256i __a, __m256i __b) 70055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 7014f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b); 70255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 70355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 70455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 7054f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_testnzc_si256(__m256i __a, __m256i __b) 70655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 7074f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b); 70855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 70955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 71055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Vector extract sign mask */ 71155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 7124f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_movemask_pd(__m256d __a) 71355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 7144f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_movmskpd256((__v4df)__a); 71555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 71655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 71755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline int __attribute__((__always_inline__, __nodebug__)) 7184f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_movemask_ps(__m256 __a) 71955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 7204f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return __builtin_ia32_movmskps256((__v8sf)__a); 72155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 72255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 7234f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie/* Vector __zero */ 72455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 72555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes_mm256_zeroall(void) 72655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 72755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes __builtin_ia32_vzeroall(); 72855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 72955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 73055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 73155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes_mm256_zeroupper(void) 73255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 73355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes __builtin_ia32_vzeroupper(); 73455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 73555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 73655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Vector load with broadcast */ 73755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m128 __attribute__((__always_inline__, __nodebug__)) 7384f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_broadcast_ss(float const *__a) 73955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 740ef8225444452a1486bd721f3285301fe84643b00Stephen Hines float __f = *__a; 741ef8225444452a1486bd721f3285301fe84643b00Stephen Hines return (__m128)(__v4sf){ __f, __f, __f, __f }; 74255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 74355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 74455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 7454f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_broadcast_sd(double const *__a) 74655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 747ef8225444452a1486bd721f3285301fe84643b00Stephen Hines double __d = *__a; 748ef8225444452a1486bd721f3285301fe84643b00Stephen Hines return (__m256d)(__v4df){ __d, __d, __d, __d }; 74955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 75055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 75155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 7524f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_broadcast_ss(float const *__a) 75355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 754ef8225444452a1486bd721f3285301fe84643b00Stephen Hines float __f = *__a; 755ef8225444452a1486bd721f3285301fe84643b00Stephen Hines return (__m256)(__v8sf){ __f, __f, __f, __f, __f, __f, __f, __f }; 75655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 75755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 75855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 7594f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_broadcast_pd(__m128d const *__a) 76055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 7614f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)__builtin_ia32_vbroadcastf128_pd256(__a); 76255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 76355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 76455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 7654f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_broadcast_ps(__m128 const *__a) 76655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 7674f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_vbroadcastf128_ps256(__a); 76855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 76955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 77055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* SIMD load ops */ 77155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 7724f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_load_pd(double const *__p) 77355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 7744f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return *(__m256d *)__p; 77555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 77655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 77755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 7784f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_load_ps(float const *__p) 77955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 7804f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return *(__m256 *)__p; 78155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 78255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 78355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 7844f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_loadu_pd(double const *__p) 78555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 7862ee2ac2293f313dfe1c6eb7034527a92b5d23158Craig Topper struct __loadu_pd { 7874f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __m256d __v; 7882ee2ac2293f313dfe1c6eb7034527a92b5d23158Craig Topper } __attribute__((packed, may_alias)); 7894f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return ((struct __loadu_pd*)__p)->__v; 79055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 79155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 79255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 7934f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_loadu_ps(float const *__p) 79455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 7952ee2ac2293f313dfe1c6eb7034527a92b5d23158Craig Topper struct __loadu_ps { 7964f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __m256 __v; 7972ee2ac2293f313dfe1c6eb7034527a92b5d23158Craig Topper } __attribute__((packed, may_alias)); 7984f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return ((struct __loadu_ps*)__p)->__v; 79955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 80055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 80155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 8024f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_load_si256(__m256i const *__p) 80355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8044f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return *__p; 80555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 80655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 80755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 8084f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_loadu_si256(__m256i const *__p) 80955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8102ee2ac2293f313dfe1c6eb7034527a92b5d23158Craig Topper struct __loadu_si256 { 8114f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __m256i __v; 8122ee2ac2293f313dfe1c6eb7034527a92b5d23158Craig Topper } __attribute__((packed, may_alias)); 8134f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return ((struct __loadu_si256*)__p)->__v; 81455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 81555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 81655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 8174f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_lddqu_si256(__m256i const *__p) 81855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8194f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)__builtin_ia32_lddqu256((char const *)__p); 82055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 82155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 82255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* SIMD store ops */ 82355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 8244f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_store_pd(double *__p, __m256d __a) 82555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8264f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie *(__m256d *)__p = __a; 82755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 82855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 82955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 8304f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_store_ps(float *__p, __m256 __a) 83155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8324f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie *(__m256 *)__p = __a; 83355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 83455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 83555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 8364f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_storeu_pd(double *__p, __m256d __a) 83755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8384f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_storeupd256(__p, (__v4df)__a); 83955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 84055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 84155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 8424f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_storeu_ps(float *__p, __m256 __a) 84355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8444f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_storeups256(__p, (__v8sf)__a); 84555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 84655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 84755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 8484f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_store_si256(__m256i *__p, __m256i __a) 84955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8504f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie *__p = __a; 85155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 85255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 85355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 8544f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_storeu_si256(__m256i *__p, __m256i __a) 85555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8564f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_storedqu256((char *)__p, (__v32qi)__a); 85755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 85855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 85955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Conditional load ops */ 86055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m128d __attribute__((__always_inline__, __nodebug__)) 8614f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_maskload_pd(double const *__p, __m128d __m) 86255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8634f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2df)__m); 86455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 86555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 86655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 8674f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_maskload_pd(double const *__p, __m256d __m) 86855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8694f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d)__builtin_ia32_maskloadpd256((const __v4df *)__p, 8704f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie (__v4df)__m); 87155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 87255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 87355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m128 __attribute__((__always_inline__, __nodebug__)) 8744f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_maskload_ps(float const *__p, __m128 __m) 87555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8764f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4sf)__m); 87755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 87855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 87955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 8804f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_maskload_ps(float const *__p, __m256 __m) 88155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8824f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8sf)__m); 88355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 88455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 88555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Conditional store ops */ 88655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 8874f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_maskstore_ps(float *__p, __m256 __m, __m256 __a) 88855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8894f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8sf)__m, (__v8sf)__a); 89055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 89155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 89255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 8934f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_maskstore_pd(double *__p, __m128d __m, __m128d __a) 89455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 8954f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_maskstorepd((__v2df *)__p, (__v2df)__m, (__v2df)__a); 89655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 89755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 89855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 8994f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_maskstore_pd(double *__p, __m256d __m, __m256d __a) 90055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 9014f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4df)__m, (__v4df)__a); 90255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 90355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 90455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 9054f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm_maskstore_ps(float *__p, __m128 __m, __m128 __a) 90655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 9074f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4sf)__m, (__v4sf)__a); 90855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 90955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 91055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Cacheability support ops */ 91155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 9124f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_stream_si256(__m256i *__a, __m256i __b) 91355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 9144f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_movntdq256((__v4di *)__a, (__v4di)__b); 91555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 91655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 91755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 9184f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_stream_pd(double *__a, __m256d __b) 91955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 9204f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_movntpd256(__a, (__v4df)__b); 92155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 92255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 92355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline void __attribute__((__always_inline__, __nodebug__)) 9244f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_stream_ps(float *__p, __m256 __a) 92555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 9264f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_movntps256(__p, (__v8sf)__a); 92755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 92855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 92955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Create vectors */ 93055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 9314f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_set_pd(double __a, double __b, double __c, double __d) 93255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 9334f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d){ __d, __c, __b, __a }; 93455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 93555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 93655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 9374f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_set_ps(float __a, float __b, float __c, float __d, 9384f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie float __e, float __f, float __g, float __h) 93955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 9404f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256){ __h, __g, __f, __e, __d, __c, __b, __a }; 94155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 94255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 94355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 9444f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_set_epi32(int __i0, int __i1, int __i2, int __i3, 9454f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie int __i4, int __i5, int __i6, int __i7) 94655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 9474f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)(__v8si){ __i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0 }; 94855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 94955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 95055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 9514f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_set_epi16(short __w15, short __w14, short __w13, short __w12, 9524f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie short __w11, short __w10, short __w09, short __w08, 9534f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie short __w07, short __w06, short __w05, short __w04, 9544f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie short __w03, short __w02, short __w01, short __w00) 95555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 9564f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)(__v16hi){ __w00, __w01, __w02, __w03, __w04, __w05, __w06, 9574f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15 }; 95855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 95955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 96055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 9614f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_set_epi8(char __b31, char __b30, char __b29, char __b28, 9624f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b27, char __b26, char __b25, char __b24, 9634f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b23, char __b22, char __b21, char __b20, 9644f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b19, char __b18, char __b17, char __b16, 9654f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b15, char __b14, char __b13, char __b12, 9664f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b11, char __b10, char __b09, char __b08, 9674f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b07, char __b06, char __b05, char __b04, 9684f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b03, char __b02, char __b01, char __b00) 96955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 97055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes return (__m256i)(__v32qi){ 9714f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07, 9724f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15, 9734f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23, 9744f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31 97555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes }; 97655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 97755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 97855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 9794f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d) 98055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 9814f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)(__v4di){ __d, __c, __b, __a }; 98255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 98355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 98455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Create vectors with elements in reverse order */ 98555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 9864f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_setr_pd(double __a, double __b, double __c, double __d) 98755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 9884f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d){ __a, __b, __c, __d }; 98955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 99055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 99155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 9924f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_setr_ps(float __a, float __b, float __c, float __d, 9934f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie float __e, float __f, float __g, float __h) 99455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 9954f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256){ __a, __b, __c, __d, __e, __f, __g, __h }; 99655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 99755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 99855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 9994f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3, 10004f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie int __i4, int __i5, int __i6, int __i7) 100155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 10024f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)(__v8si){ __i0, __i1, __i2, __i3, __i4, __i5, __i6, __i7 }; 100355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 100455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 100555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 10064f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12, 10074f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie short __w11, short __w10, short __w09, short __w08, 10084f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie short __w07, short __w06, short __w05, short __w04, 10094f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie short __w03, short __w02, short __w01, short __w00) 101055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 10114f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)(__v16hi){ __w15, __w14, __w13, __w12, __w11, __w10, __w09, 10124f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __w08, __w07, __w06, __w05, __w04, __w03, __w02, __w01, __w00 }; 101355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 101455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 101555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 10164f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28, 10174f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b27, char __b26, char __b25, char __b24, 10184f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b23, char __b22, char __b21, char __b20, 10194f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b19, char __b18, char __b17, char __b16, 10204f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b15, char __b14, char __b13, char __b12, 10214f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b11, char __b10, char __b09, char __b08, 10224f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b07, char __b06, char __b05, char __b04, 10234f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie char __b03, char __b02, char __b01, char __b00) 102455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 102555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes return (__m256i)(__v32qi){ 10264f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __b31, __b30, __b29, __b28, __b27, __b26, __b25, __b24, 10274f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __b23, __b22, __b21, __b20, __b19, __b18, __b17, __b16, 10284f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __b15, __b14, __b13, __b12, __b11, __b10, __b09, __b08, 10294f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __b07, __b06, __b05, __b04, __b03, __b02, __b01, __b00 }; 103055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 103155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 103255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 10334f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d) 103455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 10354f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)(__v4di){ __a, __b, __c, __d }; 103655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 103755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 103855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Create vectors with repeated elements */ 103955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 10404f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_set1_pd(double __w) 104155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 10424f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256d){ __w, __w, __w, __w }; 104355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 104455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 104555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 10464f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_set1_ps(float __w) 104755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 10484f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256){ __w, __w, __w, __w, __w, __w, __w, __w }; 104955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 105055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 105155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 10524f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_set1_epi32(int __i) 105355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 10544f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)(__v8si){ __i, __i, __i, __i, __i, __i, __i, __i }; 105555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 105655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 105755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 10584f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_set1_epi16(short __w) 105955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 10604f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)(__v16hi){ __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, 10614f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __w, __w, __w, __w, __w, __w }; 106255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 106355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 106455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 10654f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_set1_epi8(char __b) 106655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 10674f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)(__v32qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, 10684f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, 10694f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __b, __b, __b, __b, __b, __b, __b }; 107055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 107155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 107255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 10734f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_set1_epi64x(long long __q) 107455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 10754f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return (__m256i)(__v4di){ __q, __q, __q, __q }; 107655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 107755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 10784f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie/* Create __zeroed vectors */ 107955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 108055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes_mm256_setzero_pd(void) 108155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 108255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes return (__m256d){ 0, 0, 0, 0 }; 108355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 108455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 108555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 108655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes_mm256_setzero_ps(void) 108755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 108855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes return (__m256){ 0, 0, 0, 0, 0, 0, 0, 0 }; 108955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 109055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 109155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 109255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes_mm256_setzero_si256(void) 109355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 109455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes return (__m256i){ 0LL, 0LL, 0LL, 0LL }; 109555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 109655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 109755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes/* Cast between vector types */ 109855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 1099f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner_mm256_castpd_ps(__m256d __a) 110055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1101f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner return (__m256)__a; 110255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 110355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 110455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 1105f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner_mm256_castpd_si256(__m256d __a) 110655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1107f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner return (__m256i)__a; 110855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 110955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 111055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 1111f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner_mm256_castps_pd(__m256 __a) 111255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1113f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner return (__m256d)__a; 111455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 111555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 111655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 1117f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner_mm256_castps_si256(__m256 __a) 111855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1119f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner return (__m256i)__a; 112055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 112155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 112255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 1123f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner_mm256_castsi256_ps(__m256i __a) 112455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1125f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner return (__m256)__a; 112655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 112755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 112855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 1129f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner_mm256_castsi256_pd(__m256i __a) 113055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1131f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner return (__m256d)__a; 113255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 113355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 113455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m128d __attribute__((__always_inline__, __nodebug__)) 1135f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner_mm256_castpd256_pd128(__m256d __a) 113655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1137f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner return __builtin_shufflevector(__a, __a, 0, 1); 113855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 113955db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 114055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m128 __attribute__((__always_inline__, __nodebug__)) 1141f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner_mm256_castps256_ps128(__m256 __a) 114255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1143f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); 114455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 114555db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 114655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m128i __attribute__((__always_inline__, __nodebug__)) 1147f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner_mm256_castsi256_si128(__m256i __a) 114855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1149f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner return __builtin_shufflevector(__a, __a, 0, 1); 115055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 115155db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 115255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 1153f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner_mm256_castpd128_pd256(__m128d __a) 115455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1155d9d57e99793df47a8fc86cde9ecd656411f02777Craig Topper return __builtin_shufflevector(__a, __a, 0, 1, -1, -1); 115655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 115755db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 115855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 1159f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner_mm256_castps128_ps256(__m128 __a) 116055db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1161d9d57e99793df47a8fc86cde9ecd656411f02777Craig Topper return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1); 116255db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 116355db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes 116455db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopesstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 1165f0cdc84298103e57919674bd1781624c74ab76d3Reid Kleckner_mm256_castsi128_si256(__m128i __a) 116655db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes{ 1167d9d57e99793df47a8fc86cde9ecd656411f02777Craig Topper return __builtin_shufflevector(__a, __a, 0, 1, -1, -1); 116855db5b874416cde3f2601a717e25d0974bf02f80Bruno Cardoso Lopes} 1169db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier 1170db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier/* SIMD load ops (unaligned) */ 1171db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosierstatic __inline __m256 __attribute__((__always_inline__, __nodebug__)) 11724f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo) 1173db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier{ 1174db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier struct __loadu_ps { 11754f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __m128 __v; 1176db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier } __attribute__((__packed__, __may_alias__)); 1177db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier 11784f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __m256 __v256 = _mm256_castps128_ps256(((struct __loadu_ps*)__addr_lo)->__v); 11794f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return _mm256_insertf128_ps(__v256, ((struct __loadu_ps*)__addr_hi)->__v, 1); 1180db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier} 1181db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier 1182db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosierstatic __inline __m256d __attribute__((__always_inline__, __nodebug__)) 11834f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo) 1184db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier{ 1185db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier struct __loadu_pd { 11864f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __m128d __v; 1187db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier } __attribute__((__packed__, __may_alias__)); 1188db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier 11894f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __m256d __v256 = _mm256_castpd128_pd256(((struct __loadu_pd*)__addr_lo)->__v); 11904f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return _mm256_insertf128_pd(__v256, ((struct __loadu_pd*)__addr_hi)->__v, 1); 1191db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier} 1192db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier 1193db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosierstatic __inline __m256i __attribute__((__always_inline__, __nodebug__)) 11944f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo) 1195db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier{ 1196db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier struct __loadu_si128 { 11974f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __m128i __v; 1198db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier } __attribute__((packed, may_alias)); 11994f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __m256i __v256 = _mm256_castsi128_si256( 12004f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie ((struct __loadu_si128*)__addr_lo)->__v); 12014f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie return _mm256_insertf128_si256(__v256, 12024f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie ((struct __loadu_si128*)__addr_hi)->__v, 1); 1203db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier} 1204db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier 1205db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier/* SIMD store ops (unaligned) */ 1206db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosierstatic __inline void __attribute__((__always_inline__, __nodebug__)) 12074f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a) 1208db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier{ 12094f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __m128 __v128; 1210db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier 12114f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v128 = _mm256_castps256_ps128(__a); 12124f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_storeups(__addr_lo, __v128); 12134f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v128 = _mm256_extractf128_ps(__a, 1); 12144f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_storeups(__addr_hi, __v128); 1215db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier} 1216db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier 1217db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosierstatic __inline void __attribute__((__always_inline__, __nodebug__)) 12184f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a) 1219db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier{ 12204f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __m128d __v128; 1221db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier 12224f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v128 = _mm256_castpd256_pd128(__a); 12234f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_storeupd(__addr_lo, __v128); 12244f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v128 = _mm256_extractf128_pd(__a, 1); 12254f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_storeupd(__addr_hi, __v128); 1226db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier} 1227db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier 1228db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosierstatic __inline void __attribute__((__always_inline__, __nodebug__)) 12294f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie_mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a) 1230db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier{ 12314f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __m128i __v128; 1232db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier 12334f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v128 = _mm256_castsi256_si128(__a); 12344f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_storedqu((char *)__addr_lo, (__v16qi)__v128); 12354f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __v128 = _mm256_extractf128_si256(__a, 1); 12364f918aed75d4927e88365541c7200f0b5fe5014bDavid Blaikie __builtin_ia32_storedqu((char *)__addr_hi, (__v16qi)__v128); 1237db163c87f990653b59fcc5f6e4864b652f4a49bdChad Rosier} 12387cb4fae8da8f541f43d39896c989b06c69fd7821Richard Smith 12397cb4fae8da8f541f43d39896c989b06c69fd7821Richard Smith#endif /* __AVXINTRIN_H */ 1240