1a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang/*===---- xmmintrin.h - SSE intrinsics -------------------------------------=== 2a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang * 3a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang * Permission is hereby granted, free of charge, to any person obtaining a copy 4a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang * of this software and associated documentation files (the "Software"), to deal 5a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang * in the Software without restriction, including without limitation the rights 6a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang * copies of the Software, and to permit persons to whom the Software is 8a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang * furnished to do so, subject to the following conditions: 9a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang * 10a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang * The above copyright notice and this permission notice shall be included in 11a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang * all copies or substantial portions of the Software. 12a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang * 13a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang * THE SOFTWARE. 20a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang * 21a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang *===-----------------------------------------------------------------------=== 22a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang */ 23a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 24a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#ifndef __XMMINTRIN_H 25a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define __XMMINTRIN_H 26a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 27a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#ifndef __SSE__ 28a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#error "SSE instruction set not enabled" 29a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#else 30a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 31a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#include <mmintrin.h> 32a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 33a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangtypedef int __v4si __attribute__((__vector_size__(16))); 34a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangtypedef float __v4sf __attribute__((__vector_size__(16))); 35a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangtypedef float __m128 __attribute__((__vector_size__(16))); 36a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 37990d2fc6a5ba9884654aa2d2bc5eda709ff17ba1Stephen Hines/* This header should only be included in a hosted environment as it depends on 38990d2fc6a5ba9884654aa2d2bc5eda709ff17ba1Stephen Hines * a standard library to provide allocation routines. */ 39a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#if __STDC_HOSTED__ 40a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#include <mm_malloc.h> 41a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#endif 42a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 43a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 44c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_add_ss(__m128 __a, __m128 __b) 45a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 46c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __a[0] += __b[0]; 47c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __a; 48a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 49a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 50a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 51c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_add_ps(__m128 __a, __m128 __b) 52a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 53c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __a + __b; 54a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 55a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 56a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 57c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_sub_ss(__m128 __a, __m128 __b) 58a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 59c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __a[0] -= __b[0]; 60c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __a; 61a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 62a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 63a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 64c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_sub_ps(__m128 __a, __m128 __b) 65a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 66c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __a - __b; 67a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 68a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 69a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 70c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_mul_ss(__m128 __a, __m128 __b) 71a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 72c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __a[0] *= __b[0]; 73c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __a; 74a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 75a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 76a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 77c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_mul_ps(__m128 __a, __m128 __b) 78a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 79c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __a * __b; 80a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 81a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 82a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 83c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_div_ss(__m128 __a, __m128 __b) 84a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 85c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __a[0] /= __b[0]; 86c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __a; 87a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 88a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 89a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 90c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_div_ps(__m128 __a, __m128 __b) 91a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 92c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __a / __b; 93a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 94a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 95a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 96c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_sqrt_ss(__m128 __a) 97a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 98c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __m128 __c = __builtin_ia32_sqrtss(__a); 99c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return (__m128) { __c[0], __a[1], __a[2], __a[3] }; 100a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 101a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 102a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 103c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_sqrt_ps(__m128 __a) 104a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 105c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_ia32_sqrtps(__a); 106a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 107a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 108a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 109c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_rcp_ss(__m128 __a) 110a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 111c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __m128 __c = __builtin_ia32_rcpss(__a); 112c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return (__m128) { __c[0], __a[1], __a[2], __a[3] }; 113a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 114a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 115a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 116c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_rcp_ps(__m128 __a) 117a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 118c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_ia32_rcpps(__a); 119a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 120a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 121a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 122c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_rsqrt_ss(__m128 __a) 123a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 124c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __m128 __c = __builtin_ia32_rsqrtss(__a); 125c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return (__m128) { __c[0], __a[1], __a[2], __a[3] }; 126a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 127a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 128a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 129c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_rsqrt_ps(__m128 __a) 130a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 131c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_ia32_rsqrtps(__a); 132a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 133a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 134a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 135c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_min_ss(__m128 __a, __m128 __b) 136a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 137c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_ia32_minss(__a, __b); 138a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 139a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 140a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 141c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_min_ps(__m128 __a, __m128 __b) 142a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 143c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_ia32_minps(__a, __b); 144a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 145a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 146a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 147c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_max_ss(__m128 __a, __m128 __b) 148a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 149c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_ia32_maxss(__a, __b); 150a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 151a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 152a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 153c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_max_ps(__m128 __a, __m128 __b) 154a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 155c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_ia32_maxps(__a, __b); 156a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 157a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 158a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 159c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_and_ps(__m128 __a, __m128 __b) 160a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 161c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return (__m128)((__v4si)__a & (__v4si)__b); 162a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 163a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 164a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 165c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_andnot_ps(__m128 __a, __m128 __b) 166a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 167c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return (__m128)(~(__v4si)__a & (__v4si)__b); 168a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 169a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 170a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 171c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_or_ps(__m128 __a, __m128 __b) 172a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 173c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return (__m128)((__v4si)__a | (__v4si)__b); 174a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 175a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 176a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 177c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_xor_ps(__m128 __a, __m128 __b) 178a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 179c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return (__m128)((__v4si)__a ^ (__v4si)__b); 180a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 181a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 182a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 183c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cmpeq_ss(__m128 __a, __m128 __b) 184a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 185b4d9c8b7ea2cc40fcbf52b30b067676ecd15312dStephen Hines return (__m128)__builtin_ia32_cmpeqss(__a, __b); 186a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 187a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 188a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 189c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cmpeq_ps(__m128 __a, __m128 __b) 190a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 191b4d9c8b7ea2cc40fcbf52b30b067676ecd15312dStephen Hines return (__m128)__builtin_ia32_cmpeqps(__a, __b); 192a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 193a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 194a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 195c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cmplt_ss(__m128 __a, __m128 __b) 196a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 197b4d9c8b7ea2cc40fcbf52b30b067676ecd15312dStephen Hines return (__m128)__builtin_ia32_cmpltss(__a, __b); 198a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 199a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 200a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 201c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cmplt_ps(__m128 __a, __m128 __b) 202a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 203b4d9c8b7ea2cc40fcbf52b30b067676ecd15312dStephen Hines return (__m128)__builtin_ia32_cmpltps(__a, __b); 204a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 205a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 206a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 207c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cmple_ss(__m128 __a, __m128 __b) 208a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 209b4d9c8b7ea2cc40fcbf52b30b067676ecd15312dStephen Hines return (__m128)__builtin_ia32_cmpless(__a, __b); 210a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 211a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 212a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 213c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cmple_ps(__m128 __a, __m128 __b) 214a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 215b4d9c8b7ea2cc40fcbf52b30b067676ecd15312dStephen Hines return (__m128)__builtin_ia32_cmpleps(__a, __b); 216a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 217a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 218a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 219c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cmpgt_ss(__m128 __a, __m128 __b) 220a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 221996e4dcc9c7aa280b1c129a44c0ccd00afc9bc8bStephen Hines return (__m128)__builtin_shufflevector(__a, 222b4d9c8b7ea2cc40fcbf52b30b067676ecd15312dStephen Hines __builtin_ia32_cmpltss(__b, __a), 223996e4dcc9c7aa280b1c129a44c0ccd00afc9bc8bStephen Hines 4, 1, 2, 3); 224a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 225a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 226a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 227c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cmpgt_ps(__m128 __a, __m128 __b) 228a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 229b4d9c8b7ea2cc40fcbf52b30b067676ecd15312dStephen Hines return (__m128)__builtin_ia32_cmpltps(__b, __a); 230a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 231a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 232a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 233c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cmpge_ss(__m128 __a, __m128 __b) 234a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 235996e4dcc9c7aa280b1c129a44c0ccd00afc9bc8bStephen Hines return (__m128)__builtin_shufflevector(__a, 236b4d9c8b7ea2cc40fcbf52b30b067676ecd15312dStephen Hines __builtin_ia32_cmpless(__b, __a), 237996e4dcc9c7aa280b1c129a44c0ccd00afc9bc8bStephen Hines 4, 1, 2, 3); 238a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 239a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 240a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 241c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cmpge_ps(__m128 __a, __m128 __b) 242a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 243b4d9c8b7ea2cc40fcbf52b30b067676ecd15312dStephen Hines return (__m128)__builtin_ia32_cmpleps(__b, __a); 244a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 245a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 246a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 247c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cmpneq_ss(__m128 __a, __m128 __b) 248a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 249b4d9c8b7ea2cc40fcbf52b30b067676ecd15312dStephen Hines return (__m128)__builtin_ia32_cmpneqss(__a, __b); 250a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 251a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 252a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 253c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cmpneq_ps(__m128 __a, __m128 __b) 254a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 255b4d9c8b7ea2cc40fcbf52b30b067676ecd15312dStephen Hines return (__m128)__builtin_ia32_cmpneqps(__a, __b); 256a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 257a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 258a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 259c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cmpnlt_ss(__m128 __a, __m128 __b) 260a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 261b4d9c8b7ea2cc40fcbf52b30b067676ecd15312dStephen Hines return (__m128)__builtin_ia32_cmpnltss(__a, __b); 262a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 263a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 264a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 265c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cmpnlt_ps(__m128 __a, __m128 __b) 266a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 267b4d9c8b7ea2cc40fcbf52b30b067676ecd15312dStephen Hines return (__m128)__builtin_ia32_cmpnltps(__a, __b); 268a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 269a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 270a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 271c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cmpnle_ss(__m128 __a, __m128 __b) 272a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 273b4d9c8b7ea2cc40fcbf52b30b067676ecd15312dStephen Hines return (__m128)__builtin_ia32_cmpnless(__a, __b); 274a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 275a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 276a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 277c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cmpnle_ps(__m128 __a, __m128 __b) 278a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 279b4d9c8b7ea2cc40fcbf52b30b067676ecd15312dStephen Hines return (__m128)__builtin_ia32_cmpnleps(__a, __b); 280a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 281a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 282a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 283c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cmpngt_ss(__m128 __a, __m128 __b) 284a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 285996e4dcc9c7aa280b1c129a44c0ccd00afc9bc8bStephen Hines return (__m128)__builtin_shufflevector(__a, 286b4d9c8b7ea2cc40fcbf52b30b067676ecd15312dStephen Hines __builtin_ia32_cmpnltss(__b, __a), 287996e4dcc9c7aa280b1c129a44c0ccd00afc9bc8bStephen Hines 4, 1, 2, 3); 288a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 289a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 290a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 291c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cmpngt_ps(__m128 __a, __m128 __b) 292a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 293b4d9c8b7ea2cc40fcbf52b30b067676ecd15312dStephen Hines return (__m128)__builtin_ia32_cmpnltps(__b, __a); 294a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 295a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 296a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 297c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cmpnge_ss(__m128 __a, __m128 __b) 298a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 299996e4dcc9c7aa280b1c129a44c0ccd00afc9bc8bStephen Hines return (__m128)__builtin_shufflevector(__a, 300b4d9c8b7ea2cc40fcbf52b30b067676ecd15312dStephen Hines __builtin_ia32_cmpnless(__b, __a), 301996e4dcc9c7aa280b1c129a44c0ccd00afc9bc8bStephen Hines 4, 1, 2, 3); 302a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 303a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 304a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 305c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cmpnge_ps(__m128 __a, __m128 __b) 306a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 307b4d9c8b7ea2cc40fcbf52b30b067676ecd15312dStephen Hines return (__m128)__builtin_ia32_cmpnleps(__b, __a); 308a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 309a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 310a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 311c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cmpord_ss(__m128 __a, __m128 __b) 312a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 313b4d9c8b7ea2cc40fcbf52b30b067676ecd15312dStephen Hines return (__m128)__builtin_ia32_cmpordss(__a, __b); 314a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 315a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 316a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 317c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cmpord_ps(__m128 __a, __m128 __b) 318a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 319b4d9c8b7ea2cc40fcbf52b30b067676ecd15312dStephen Hines return (__m128)__builtin_ia32_cmpordps(__a, __b); 320a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 321a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 322a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 323c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cmpunord_ss(__m128 __a, __m128 __b) 324a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 325b4d9c8b7ea2cc40fcbf52b30b067676ecd15312dStephen Hines return (__m128)__builtin_ia32_cmpunordss(__a, __b); 326a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 327a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 328a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 329c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cmpunord_ps(__m128 __a, __m128 __b) 330a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 331b4d9c8b7ea2cc40fcbf52b30b067676ecd15312dStephen Hines return (__m128)__builtin_ia32_cmpunordps(__a, __b); 332a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 333a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 334a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 335c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_comieq_ss(__m128 __a, __m128 __b) 336a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 337c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_ia32_comieq(__a, __b); 338a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 339a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 340a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 341c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_comilt_ss(__m128 __a, __m128 __b) 342a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 343c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_ia32_comilt(__a, __b); 344a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 345a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 346a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 347c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_comile_ss(__m128 __a, __m128 __b) 348a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 349c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_ia32_comile(__a, __b); 350a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 351a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 352a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 353c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_comigt_ss(__m128 __a, __m128 __b) 354a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 355c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_ia32_comigt(__a, __b); 356a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 357a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 358a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 359c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_comige_ss(__m128 __a, __m128 __b) 360a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 361c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_ia32_comige(__a, __b); 362a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 363a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 364a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 365c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_comineq_ss(__m128 __a, __m128 __b) 366a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 367c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_ia32_comineq(__a, __b); 368a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 369a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 370a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 371c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_ucomieq_ss(__m128 __a, __m128 __b) 372a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 373c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_ia32_ucomieq(__a, __b); 374a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 375a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 376a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 377c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_ucomilt_ss(__m128 __a, __m128 __b) 378a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 379c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_ia32_ucomilt(__a, __b); 380a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 381a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 382a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 383c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_ucomile_ss(__m128 __a, __m128 __b) 384a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 385c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_ia32_ucomile(__a, __b); 386a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 387a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 388a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 389c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_ucomigt_ss(__m128 __a, __m128 __b) 390a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 391c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_ia32_ucomigt(__a, __b); 392a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 393a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 394a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 395c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_ucomige_ss(__m128 __a, __m128 __b) 396a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 397c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_ia32_ucomige(__a, __b); 398a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 399a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 400a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 401c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_ucomineq_ss(__m128 __a, __m128 __b) 402a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 403c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_ia32_ucomineq(__a, __b); 404a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 405a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 406a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 407c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cvtss_si32(__m128 __a) 408a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 409c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_ia32_cvtss2si(__a); 410a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 411a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 412a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 413c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cvt_ss2si(__m128 __a) 414a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 415c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return _mm_cvtss_si32(__a); 416a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 417a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 418a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#ifdef __x86_64__ 419a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 420a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ long long __attribute__((__always_inline__, __nodebug__)) 421c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cvtss_si64(__m128 __a) 422a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 423c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_ia32_cvtss2si64(__a); 424a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 425a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 426a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#endif 427a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 428a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 429c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cvtps_pi32(__m128 __a) 430a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 431c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return (__m64)__builtin_ia32_cvtps2pi(__a); 432a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 433a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 434a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 435c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cvt_ps2pi(__m128 __a) 436a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 437c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return _mm_cvtps_pi32(__a); 438a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 439a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 440a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 441c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cvttss_si32(__m128 __a) 442a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 443c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __a[0]; 444a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 445a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 446a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 447c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cvtt_ss2si(__m128 __a) 448a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 449c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return _mm_cvttss_si32(__a); 450a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 451a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 452a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ long long __attribute__((__always_inline__, __nodebug__)) 453c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cvttss_si64(__m128 __a) 454a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 455c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __a[0]; 456a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 457a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 458a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 459c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cvttps_pi32(__m128 __a) 460a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 461c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return (__m64)__builtin_ia32_cvttps2pi(__a); 462a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 463a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 464a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 465c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cvtt_ps2pi(__m128 __a) 466a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 467c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return _mm_cvttps_pi32(__a); 468a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 469a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 470a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 471c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cvtsi32_ss(__m128 __a, int __b) 472a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 473c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __a[0] = __b; 474c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __a; 475a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 476a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 477a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 478c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cvt_si2ss(__m128 __a, int __b) 479a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 480c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return _mm_cvtsi32_ss(__a, __b); 481a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 482a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 483a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#ifdef __x86_64__ 484a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 485a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 486c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cvtsi64_ss(__m128 __a, long long __b) 487a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 488c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __a[0] = __b; 489c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __a; 490a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 491a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 492a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#endif 493a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 494a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 495c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cvtpi32_ps(__m128 __a, __m64 __b) 496a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 497c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_ia32_cvtpi2ps(__a, (__v2si)__b); 498a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 499a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 500a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 501c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cvt_pi2ps(__m128 __a, __m64 __b) 502a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 503c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return _mm_cvtpi32_ps(__a, __b); 504a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 505a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 506a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ float __attribute__((__always_inline__, __nodebug__)) 507c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cvtss_f32(__m128 __a) 508a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 509c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __a[0]; 510a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 511a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 512a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 513c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_loadh_pi(__m128 __a, const __m64 *__p) 514a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 515a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang typedef float __mm_loadh_pi_v2f32 __attribute__((__vector_size__(8))); 516a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang struct __mm_loadh_pi_struct { 517c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __mm_loadh_pi_v2f32 __u; 518a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang } __attribute__((__packed__, __may_alias__)); 519c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __mm_loadh_pi_v2f32 __b = ((struct __mm_loadh_pi_struct*)__p)->__u; 520c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1); 521c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5); 522a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 523a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 524a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 525c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_loadl_pi(__m128 __a, const __m64 *__p) 526a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 527a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang typedef float __mm_loadl_pi_v2f32 __attribute__((__vector_size__(8))); 528a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang struct __mm_loadl_pi_struct { 529c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __mm_loadl_pi_v2f32 __u; 530a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang } __attribute__((__packed__, __may_alias__)); 531c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __mm_loadl_pi_v2f32 __b = ((struct __mm_loadl_pi_struct*)__p)->__u; 532c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1); 533c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3); 534a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 535a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 536a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 537c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_load_ss(const float *__p) 538a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 539a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang struct __mm_load_ss_struct { 540c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines float __u; 541a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang } __attribute__((__packed__, __may_alias__)); 542c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines float __u = ((struct __mm_load_ss_struct*)__p)->__u; 543c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return (__m128){ __u, 0, 0, 0 }; 544a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 545a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 546a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 547c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_load1_ps(const float *__p) 548a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 549a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang struct __mm_load1_ps_struct { 550c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines float __u; 551a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang } __attribute__((__packed__, __may_alias__)); 552c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines float __u = ((struct __mm_load1_ps_struct*)__p)->__u; 553c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return (__m128){ __u, __u, __u, __u }; 554a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 555a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 556a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _mm_load_ps1(p) _mm_load1_ps(p) 557a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 558a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 559c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_load_ps(const float *__p) 560a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 561c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return *(__m128*)__p; 562a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 563a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 564a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 565c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_loadu_ps(const float *__p) 566a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 567a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang struct __loadu_ps { 568c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __m128 __v; 569a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang } __attribute__((__packed__, __may_alias__)); 570c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return ((struct __loadu_ps*)__p)->__v; 571a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 572a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 573a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 574c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_loadr_ps(const float *__p) 575a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 576c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __m128 __a = _mm_load_ps(__p); 577c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); 578a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 579a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 580a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 581c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_set_ss(float __w) 582a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 583c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return (__m128){ __w, 0, 0, 0 }; 584a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 585a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 586a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 587c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_set1_ps(float __w) 588a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 589c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return (__m128){ __w, __w, __w, __w }; 590a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 591a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 592990d2fc6a5ba9884654aa2d2bc5eda709ff17ba1Stephen Hines/* Microsoft specific. */ 593a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 594c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_set_ps1(float __w) 595a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 596c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return _mm_set1_ps(__w); 597a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 598a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 599a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 600c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_set_ps(float __z, float __y, float __x, float __w) 601a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 602c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return (__m128){ __w, __x, __y, __z }; 603a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 604a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 605a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 606c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_setr_ps(float __z, float __y, float __x, float __w) 607a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 608c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return (__m128){ __z, __y, __x, __w }; 609a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 610a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 611a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__)) 612a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang_mm_setzero_ps(void) 613a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 614a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang return (__m128){ 0, 0, 0, 0 }; 615a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 616a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 617a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ void __attribute__((__always_inline__)) 618c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_storeh_pi(__m64 *__p, __m128 __a) 619a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 620c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __builtin_ia32_storehps((__v2si *)__p, __a); 621a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 622a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 623a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ void __attribute__((__always_inline__)) 624c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_storel_pi(__m64 *__p, __m128 __a) 625a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 626c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __builtin_ia32_storelps((__v2si *)__p, __a); 627a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 628a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 629a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ void __attribute__((__always_inline__)) 630c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_store_ss(float *__p, __m128 __a) 631a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 632a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang struct __mm_store_ss_struct { 633c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines float __u; 634a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang } __attribute__((__packed__, __may_alias__)); 635c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines ((struct __mm_store_ss_struct*)__p)->__u = __a[0]; 636a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 637a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 638a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 639c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_storeu_ps(float *__p, __m128 __a) 640a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 641c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __builtin_ia32_storeups(__p, __a); 642a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 643a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 644a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 645c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_store1_ps(float *__p, __m128 __a) 646a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 647c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __a = __builtin_shufflevector(__a, __a, 0, 0, 0, 0); 648c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines _mm_storeu_ps(__p, __a); 649a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 650a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 651a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 652c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_store_ps1(float *__p, __m128 __a) 653a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 654c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return _mm_store1_ps(__p, __a); 655a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 656a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 657a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 658c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_store_ps(float *__p, __m128 __a) 659a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 660c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines *(__m128 *)__p = __a; 661a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 662a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 663a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 664c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_storer_ps(float *__p, __m128 __a) 665a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 666c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __a = __builtin_shufflevector(__a, __a, 3, 2, 1, 0); 667c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines _mm_store_ps(__p, __a); 668a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 669a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 670a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_HINT_T0 3 671a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_HINT_T1 2 672a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_HINT_T2 1 673a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_HINT_NTA 0 674a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 67530047ab0b15bddc2ae0743e8d0b60226a1da9883Stephen Hines#ifndef _MSC_VER 676a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang/* FIXME: We have to #define this because "sel" must be a constant integer, and 677a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang Sema doesn't do any form of constant propagation yet. */ 678a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 6796099914810f6f01c79e5da88794972fa4f1439c4Ying Wang#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), 0, (sel))) 68030047ab0b15bddc2ae0743e8d0b60226a1da9883Stephen Hines#endif 681a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 682a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 683c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_stream_pi(__m64 *__p, __m64 __a) 684a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 685c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __builtin_ia32_movntq(__p, __a); 686a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 687a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 688a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 689c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_stream_ps(float *__p, __m128 __a) 690a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 691c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __builtin_ia32_movntps(__p, __a); 692a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 693a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 694a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 695a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang_mm_sfence(void) 696a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 697a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang __builtin_ia32_sfence(); 698a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 699a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 700a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 701c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_extract_pi16(__m64 __a, int __n) 702a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 703c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __v4hi __b = (__v4hi)__a; 704c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return (unsigned short)__b[__n & 3]; 705a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 706a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 707a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 708c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_insert_pi16(__m64 __a, int __d, int __n) 709a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 710c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __v4hi __b = (__v4hi)__a; 711c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __b[__n & 3] = __d; 712c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return (__m64)__b; 713a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 714a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 715a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 716c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_max_pi16(__m64 __a, __m64 __b) 717a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 718c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return (__m64)__builtin_ia32_pmaxsw((__v4hi)__a, (__v4hi)__b); 719a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 720a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 721a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 722c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_max_pu8(__m64 __a, __m64 __b) 723a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 724c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return (__m64)__builtin_ia32_pmaxub((__v8qi)__a, (__v8qi)__b); 725a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 726a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 727a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 728c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_min_pi16(__m64 __a, __m64 __b) 729a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 730c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return (__m64)__builtin_ia32_pminsw((__v4hi)__a, (__v4hi)__b); 731a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 732a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 733a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 734c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_min_pu8(__m64 __a, __m64 __b) 735a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 736c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return (__m64)__builtin_ia32_pminub((__v8qi)__a, (__v8qi)__b); 737a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 738a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 739a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 740c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_movemask_pi8(__m64 __a) 741a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 742c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_ia32_pmovmskb((__v8qi)__a); 743a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 744a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 745a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 746c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_mulhi_pu16(__m64 __a, __m64 __b) 747a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 748c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return (__m64)__builtin_ia32_pmulhuw((__v4hi)__a, (__v4hi)__b); 749a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 750a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 7516099914810f6f01c79e5da88794972fa4f1439c4Ying Wang#define _mm_shuffle_pi16(a, n) __extension__ ({ \ 7526099914810f6f01c79e5da88794972fa4f1439c4Ying Wang __m64 __a = (a); \ 7536099914810f6f01c79e5da88794972fa4f1439c4Ying Wang (__m64)__builtin_ia32_pshufw((__v4hi)__a, (n)); }) 754a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 755a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 756c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_maskmove_si64(__m64 __d, __m64 __n, char *__p) 757a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 758c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __builtin_ia32_maskmovq((__v8qi)__d, (__v8qi)__n, __p); 759a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 760a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 761a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 762c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_avg_pu8(__m64 __a, __m64 __b) 763a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 764c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return (__m64)__builtin_ia32_pavgb((__v8qi)__a, (__v8qi)__b); 765a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 766a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 767a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 768c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_avg_pu16(__m64 __a, __m64 __b) 769a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 770c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return (__m64)__builtin_ia32_pavgw((__v4hi)__a, (__v4hi)__b); 771a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 772a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 773a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 774c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_sad_pu8(__m64 __a, __m64 __b) 775a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 776c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return (__m64)__builtin_ia32_psadbw((__v8qi)__a, (__v8qi)__b); 777a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 778a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 779a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) 780a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang_mm_getcsr(void) 781a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 782a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang return __builtin_ia32_stmxcsr(); 783a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 784a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 785a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 786c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_setcsr(unsigned int __i) 787a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 788c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __builtin_ia32_ldmxcsr(__i); 789a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 790a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 7916099914810f6f01c79e5da88794972fa4f1439c4Ying Wang#define _mm_shuffle_ps(a, b, mask) __extension__ ({ \ 7926099914810f6f01c79e5da88794972fa4f1439c4Ying Wang __m128 __a = (a); \ 7936099914810f6f01c79e5da88794972fa4f1439c4Ying Wang __m128 __b = (b); \ 7946099914810f6f01c79e5da88794972fa4f1439c4Ying Wang (__m128)__builtin_shufflevector((__v4sf)__a, (__v4sf)__b, \ 7956099914810f6f01c79e5da88794972fa4f1439c4Ying Wang (mask) & 0x3, ((mask) & 0xc) >> 2, \ 7966099914810f6f01c79e5da88794972fa4f1439c4Ying Wang (((mask) & 0x30) >> 4) + 4, \ 7976099914810f6f01c79e5da88794972fa4f1439c4Ying Wang (((mask) & 0xc0) >> 6) + 4); }) 798a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 799a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 800c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_unpackhi_ps(__m128 __a, __m128 __b) 801a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 802c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_shufflevector(__a, __b, 2, 6, 3, 7); 803a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 804a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 805a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 806c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_unpacklo_ps(__m128 __a, __m128 __b) 807a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 808c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_shufflevector(__a, __b, 0, 4, 1, 5); 809a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 810a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 811a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 812c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_move_ss(__m128 __a, __m128 __b) 813a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 814c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_shufflevector(__a, __b, 4, 1, 2, 3); 815a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 816a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 817a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 818c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_movehl_ps(__m128 __a, __m128 __b) 819a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 820c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_shufflevector(__a, __b, 6, 7, 2, 3); 821a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 822a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 823a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 824c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_movelh_ps(__m128 __a, __m128 __b) 825a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 826c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_shufflevector(__a, __b, 0, 1, 4, 5); 827a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 828a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 829a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 830c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cvtpi16_ps(__m64 __a) 831a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 832c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __m64 __b, __c; 833c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __m128 __r; 834a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 835c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __b = _mm_setzero_si64(); 836c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __b = _mm_cmpgt_pi16(__b, __a); 837c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __c = _mm_unpackhi_pi16(__a, __b); 838c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __r = _mm_setzero_ps(); 839c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __r = _mm_cvtpi32_ps(__r, __c); 840c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __r = _mm_movelh_ps(__r, __r); 841c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __c = _mm_unpacklo_pi16(__a, __b); 842c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __r = _mm_cvtpi32_ps(__r, __c); 843a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 844c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __r; 845a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 846a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 847a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 848c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cvtpu16_ps(__m64 __a) 849a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 850c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __m64 __b, __c; 851c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __m128 __r; 852a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 853c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __b = _mm_setzero_si64(); 854c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __c = _mm_unpackhi_pi16(__a, __b); 855c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __r = _mm_setzero_ps(); 856c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __r = _mm_cvtpi32_ps(__r, __c); 857c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __r = _mm_movelh_ps(__r, __r); 858c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __c = _mm_unpacklo_pi16(__a, __b); 859c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __r = _mm_cvtpi32_ps(__r, __c); 860a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 861c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __r; 862a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 863a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 864a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 865c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cvtpi8_ps(__m64 __a) 866a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 867c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __m64 __b; 868a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 869c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __b = _mm_setzero_si64(); 870c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __b = _mm_cmpgt_pi8(__b, __a); 871c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __b = _mm_unpacklo_pi8(__a, __b); 872a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 873c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return _mm_cvtpi16_ps(__b); 874a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 875a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 876a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 877c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cvtpu8_ps(__m64 __a) 878a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 879c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __m64 __b; 880a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 881c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __b = _mm_setzero_si64(); 882c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __b = _mm_unpacklo_pi8(__a, __b); 883a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 884c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return _mm_cvtpi16_ps(__b); 885a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 886a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 887a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 888c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cvtpi32x2_ps(__m64 __a, __m64 __b) 889a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 890c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __m128 __c; 891a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 892c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __c = _mm_setzero_ps(); 893c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __c = _mm_cvtpi32_ps(__c, __b); 894c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __c = _mm_movelh_ps(__c, __c); 895a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 896c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return _mm_cvtpi32_ps(__c, __a); 897a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 898a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 899a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 900c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cvtps_pi16(__m128 __a) 901a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 902c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __m64 __b, __c; 903a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 904c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __b = _mm_cvtps_pi32(__a); 905c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __a = _mm_movehl_ps(__a, __a); 906c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __c = _mm_cvtps_pi32(__a); 907a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 908e65db135769d1f9fa24c9e1f44edffa2f1871eb4Stephen Hines return _mm_packs_pi32(__b, __c); 909a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 910a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 911a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 912c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_cvtps_pi8(__m128 __a) 913a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 914c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __m64 __b, __c; 915a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 916c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __b = _mm_cvtps_pi16(__a); 917c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines __c = _mm_setzero_si64(); 918a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 919c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return _mm_packs_pi16(__b, __c); 920a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 921a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 922a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 923c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines_mm_movemask_ps(__m128 __a) 924a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang{ 925c6ee7dfdcb47cdfbdb2e3cb056d590f87ce3bcb1Stephen Hines return __builtin_ia32_movmskps(__a); 926a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} 927a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 928a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) 929a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 930a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_EXCEPT_INVALID (0x0001) 931a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_EXCEPT_DENORM (0x0002) 932a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_EXCEPT_DIV_ZERO (0x0004) 933a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_EXCEPT_OVERFLOW (0x0008) 934a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_EXCEPT_UNDERFLOW (0x0010) 935a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_EXCEPT_INEXACT (0x0020) 936a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_EXCEPT_MASK (0x003f) 937a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 938a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_MASK_INVALID (0x0080) 939a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_MASK_DENORM (0x0100) 940a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_MASK_DIV_ZERO (0x0200) 941a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_MASK_OVERFLOW (0x0400) 942a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_MASK_UNDERFLOW (0x0800) 943a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_MASK_INEXACT (0x1000) 944a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_MASK_MASK (0x1f80) 945a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 946a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_ROUND_NEAREST (0x0000) 947a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_ROUND_DOWN (0x2000) 948a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_ROUND_UP (0x4000) 949a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_ROUND_TOWARD_ZERO (0x6000) 950a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_ROUND_MASK (0x6000) 951a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 952a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_FLUSH_ZERO_MASK (0x8000) 953a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_FLUSH_ZERO_ON (0x8000) 9546099914810f6f01c79e5da88794972fa4f1439c4Ying Wang#define _MM_FLUSH_ZERO_OFF (0x0000) 955a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 956a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_GET_EXCEPTION_MASK() (_mm_getcsr() & _MM_MASK_MASK) 957a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_GET_EXCEPTION_STATE() (_mm_getcsr() & _MM_EXCEPT_MASK) 958a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_GET_FLUSH_ZERO_MODE() (_mm_getcsr() & _MM_FLUSH_ZERO_MASK) 959a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_GET_ROUNDING_MODE() (_mm_getcsr() & _MM_ROUND_MASK) 960a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 961a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_SET_EXCEPTION_MASK(x) (_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (x))) 962a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_SET_EXCEPTION_STATE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (x))) 963a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_SET_FLUSH_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (x))) 964a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_SET_ROUNDING_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (x))) 965a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 966a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ 967a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wangdo { \ 968a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang __m128 tmp3, tmp2, tmp1, tmp0; \ 969a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang tmp0 = _mm_unpacklo_ps((row0), (row1)); \ 970a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang tmp2 = _mm_unpacklo_ps((row2), (row3)); \ 971a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang tmp1 = _mm_unpackhi_ps((row0), (row1)); \ 972a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang tmp3 = _mm_unpackhi_ps((row2), (row3)); \ 973a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang (row0) = _mm_movelh_ps(tmp0, tmp2); \ 974a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang (row1) = _mm_movehl_ps(tmp2, tmp0); \ 975a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang (row2) = _mm_movelh_ps(tmp1, tmp3); \ 976a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang (row3) = _mm_movehl_ps(tmp3, tmp1); \ 977a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang} while (0) 978a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 979a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang/* Aliases for compatibility. */ 980a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _m_pextrw _mm_extract_pi16 981a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _m_pinsrw _mm_insert_pi16 982a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _m_pmaxsw _mm_max_pi16 983a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _m_pmaxub _mm_max_pu8 984a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _m_pminsw _mm_min_pi16 985a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _m_pminub _mm_min_pu8 986a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _m_pmovmskb _mm_movemask_pi8 987a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _m_pmulhuw _mm_mulhi_pu16 988a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _m_pshufw _mm_shuffle_pi16 989a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _m_maskmovq _mm_maskmove_si64 990a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _m_pavgb _mm_avg_pu8 991a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _m_pavgw _mm_avg_pu16 992a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _m_psadbw _mm_sad_pu8 993a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _m_ _mm_ 994a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#define _m_ _mm_ 995a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 996a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang/* Ugly hack for backwards-compatibility (compatible with gcc) */ 997a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#ifdef __SSE2__ 998a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#include <emmintrin.h> 999a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#endif 1000a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 1001a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#endif /* __SSE__ */ 1002a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang 1003a6720149b0039b3a5e5a3183c124d500f0830d38Ying Wang#endif /* __XMMINTRIN_H */ 1004