xmmintrin.h revision 6bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89
18d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/*===---- xmmintrin.h - SSE intrinsics -------------------------------------===
28d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt *
3807291d85bf857320aff6a8ade38c5f622ab9df8Dmitry Shmidt * Permission is hereby granted, free of charge, to any person obtaining a copy
48d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * of this software and associated documentation files (the "Software"), to deal
58d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * in the Software without restriction, including without limitation the rights
6c5ec7f57ead87efa365800228aa0b09a12d9e6c4Dmitry Shmidt * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7c5ec7f57ead87efa365800228aa0b09a12d9e6c4Dmitry Shmidt * copies of the Software, and to permit persons to whom the Software is
88d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * furnished to do so, subject to the following conditions:
98d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt *
108d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * The above copyright notice and this permission notice shall be included in
118d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * all copies or substantial portions of the Software.
128d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt *
139d9e60286e05ae45025b672636490bd12586138dDmitry Shmidt * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
149d9e60286e05ae45025b672636490bd12586138dDmitry Shmidt * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
158d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
168d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
178d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
188d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
198d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * THE SOFTWARE.
208d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt *
218d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt *===-----------------------------------------------------------------------===
228d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt */
238d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
248d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#ifndef __XMMINTRIN_H
258d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define __XMMINTRIN_H
268d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
278d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#ifndef __SSE__
288d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#error "SSE instruction set not enabled"
298d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#else
306c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt
316c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt#include <mmintrin.h>
328d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
338d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidttypedef int __v4si __attribute__((__vector_size__(16)));
348d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidttypedef float __v4sf __attribute__((__vector_size__(16)));
358d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidttypedef float __m128 __attribute__((__vector_size__(16)));
368d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
378d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt// This header should only be included in a hosted environment as it depends on
388d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt// a standard library to provide allocation routines.
398d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#if __STDC_HOSTED__
408d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include <mm_malloc.h>
418d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#endif
428d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
438d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
448d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_add_ss(__m128 __a, __m128 __b)
458d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
468d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __a[0] += __b[0];
478d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __a;
488d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
498d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
508d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
518d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_add_ps(__m128 __a, __m128 __b)
528d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
538d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __a + __b;
548d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
558d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
568d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
578d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_sub_ss(__m128 __a, __m128 __b)
588d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
598d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __a[0] -= __b[0];
608d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __a;
618d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
628d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
638d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
648d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_sub_ps(__m128 __a, __m128 __b)
658d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
668d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __a - __b;
678d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
688d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
698d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
708d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_mul_ss(__m128 __a, __m128 __b)
718d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
728d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __a[0] *= __b[0];
738d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __a;
748d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
758d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
768d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
778d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_mul_ps(__m128 __a, __m128 __b)
788d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
798d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __a * __b;
808d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
818d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
828d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
83d5e4923d04122f81300fa68fb07d64ede28fd44dDmitry Shmidt_mm_div_ss(__m128 __a, __m128 __b)
848d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
858d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __a[0] /= __b[0];
868d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __a;
878d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
888d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
898d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
908d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_div_ps(__m128 __a, __m128 __b)
918d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
928d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __a / __b;
938d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
948d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
958d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
968d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_sqrt_ss(__m128 __a)
978d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
988d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __m128 __c = __builtin_ia32_sqrtss(__a);
998d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m128) { __c[0], __a[1], __a[2], __a[3] };
1008d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
1018d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1028d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
1038d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_sqrt_ps(__m128 __a)
1048d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
1058d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __builtin_ia32_sqrtps(__a);
1068d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
1078d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1088d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
1098d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_rcp_ss(__m128 __a)
1108d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
1118d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __m128 __c = __builtin_ia32_rcpss(__a);
1128d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m128) { __c[0], __a[1], __a[2], __a[3] };
1138d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
1148d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1158d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
1168d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_rcp_ps(__m128 __a)
1178d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
1188d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __builtin_ia32_rcpps(__a);
1198d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
1208d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1218d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
1228d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_rsqrt_ss(__m128 __a)
1238d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
1248d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __m128 __c = __builtin_ia32_rsqrtss(__a);
1258d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m128) { __c[0], __a[1], __a[2], __a[3] };
1268d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
1278d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1288d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
1298d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_rsqrt_ps(__m128 __a)
1308d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
1318d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __builtin_ia32_rsqrtps(__a);
1328d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
1338d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1348d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
1358d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_min_ss(__m128 __a, __m128 __b)
1368d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
1378d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __builtin_ia32_minss(__a, __b);
1388d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
1398d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1408d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
1418d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_min_ps(__m128 __a, __m128 __b)
1428d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
1438d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __builtin_ia32_minps(__a, __b);
1448d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
1458d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1468d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
1478d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_max_ss(__m128 __a, __m128 __b)
1488d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
1498d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __builtin_ia32_maxss(__a, __b);
1508d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
1518d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1528d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
1538d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_max_ps(__m128 __a, __m128 __b)
1548d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
1551f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt  return __builtin_ia32_maxps(__a, __b);
1561f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt}
1571f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
1581f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
1591f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_and_ps(__m128 __a, __m128 __b)
1601f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{
1611f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt  return (__m128)((__v4si)__a & (__v4si)__b);
1621f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt}
1631f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
1648d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
165d5e4923d04122f81300fa68fb07d64ede28fd44dDmitry Shmidt_mm_andnot_ps(__m128 __a, __m128 __b)
166d5e4923d04122f81300fa68fb07d64ede28fd44dDmitry Shmidt{
1671f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt  return (__m128)(~(__v4si)__a & (__v4si)__b);
1687d5c8f257a74ac0d12828962a492e8b84ef83923Dmitry Shmidt}
169fb79edc9df1f20461e90e478363d207348213d35Dmitry Shmidt
1708d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
1718d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_or_ps(__m128 __a, __m128 __b)
1728d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
1738d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m128)((__v4si)__a | (__v4si)__b);
1748d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
1758d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1768d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
1778d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_xor_ps(__m128 __a, __m128 __b)
1788d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
1798d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m128)((__v4si)__a ^ (__v4si)__b);
1808d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
1818d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1828d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
1838d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cmpeq_ss(__m128 __a, __m128 __b)
1848d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
1858d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m128)__builtin_ia32_cmpss(__a, __b, 0);
1868d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
1878d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1888d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
1898d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cmpeq_ps(__m128 __a, __m128 __b)
1908d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
1918d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m128)__builtin_ia32_cmpps(__a, __b, 0);
1928d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
1938d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1948d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
1958d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cmplt_ss(__m128 __a, __m128 __b)
1968d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
1978d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m128)__builtin_ia32_cmpss(__a, __b, 1);
1988d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
1998d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
2008d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
2016c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt_mm_cmplt_ps(__m128 __a, __m128 __b)
2026c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt{
2036c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt  return (__m128)__builtin_ia32_cmpps(__a, __b, 1);
2046c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt}
2056c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt
2066c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
2076c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt_mm_cmple_ss(__m128 __a, __m128 __b)
2086c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt{
2096c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt  return (__m128)__builtin_ia32_cmpss(__a, __b, 2);
2106c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt}
2118d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
2128d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
2138d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cmple_ps(__m128 __a, __m128 __b)
2148d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
2158d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m128)__builtin_ia32_cmpps(__a, __b, 2);
2168d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
2178d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
2188d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
2198d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cmpgt_ss(__m128 __a, __m128 __b)
2208d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
2218d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m128)__builtin_shufflevector(__a,
222051af73b8f8014eff33330aead0f36944b3403e6Dmitry Shmidt                                         __builtin_ia32_cmpss(__b, __a, 1),
2238d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt                                         4, 1, 2, 3);
2248d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
2258d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
2268d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
2278d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cmpgt_ps(__m128 __a, __m128 __b)
2288d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
2298d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m128)__builtin_ia32_cmpps(__b, __a, 1);
2308d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
2318d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
2328d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
2338d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cmpge_ss(__m128 __a, __m128 __b)
2348d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
2358d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m128)__builtin_shufflevector(__a,
2368d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt                                         __builtin_ia32_cmpss(__b, __a, 2),
2378d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt                                         4, 1, 2, 3);
238f86232838cf712377867cb42417c1613ab5dc425Dmitry Shmidt}
2398d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
2408d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
241f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt_mm_cmpge_ps(__m128 __a, __m128 __b)
2428d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
2438d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m128)__builtin_ia32_cmpps(__b, __a, 2);
2448d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
2458d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
246344abd362cfe2d03ed956666527352826b67bde5Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
2478d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cmpneq_ss(__m128 __a, __m128 __b)
2488d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
249d5e4923d04122f81300fa68fb07d64ede28fd44dDmitry Shmidt  return (__m128)__builtin_ia32_cmpss(__a, __b, 4);
2501f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt}
2516c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt
2528d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
2538d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cmpneq_ps(__m128 __a, __m128 __b)
2548d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
2558d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m128)__builtin_ia32_cmpps(__a, __b, 4);
256a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt}
25704949598a23f501be6eec21697465fd46a28840aDmitry Shmidt
25804949598a23f501be6eec21697465fd46a28840aDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
25904949598a23f501be6eec21697465fd46a28840aDmitry Shmidt_mm_cmpnlt_ss(__m128 __a, __m128 __b)
26004949598a23f501be6eec21697465fd46a28840aDmitry Shmidt{
2611f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt  return (__m128)__builtin_ia32_cmpss(__a, __b, 5);
2628d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
2631f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
2648d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
265051af73b8f8014eff33330aead0f36944b3403e6Dmitry Shmidt_mm_cmpnlt_ps(__m128 __a, __m128 __b)
2661f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{
2676c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt  return (__m128)__builtin_ia32_cmpps(__a, __b, 5);
2686c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt}
2696c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt
2708d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
2716c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt_mm_cmpnle_ss(__m128 __a, __m128 __b)
2726c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt{
273d5e4923d04122f81300fa68fb07d64ede28fd44dDmitry Shmidt  return (__m128)__builtin_ia32_cmpss(__a, __b, 6);
27404949598a23f501be6eec21697465fd46a28840aDmitry Shmidt}
27504949598a23f501be6eec21697465fd46a28840aDmitry Shmidt
27604949598a23f501be6eec21697465fd46a28840aDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
27704949598a23f501be6eec21697465fd46a28840aDmitry Shmidt_mm_cmpnle_ps(__m128 __a, __m128 __b)
27804949598a23f501be6eec21697465fd46a28840aDmitry Shmidt{
27904949598a23f501be6eec21697465fd46a28840aDmitry Shmidt  return (__m128)__builtin_ia32_cmpps(__a, __b, 6);
28004949598a23f501be6eec21697465fd46a28840aDmitry Shmidt}
28104949598a23f501be6eec21697465fd46a28840aDmitry Shmidt
28204949598a23f501be6eec21697465fd46a28840aDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
2838d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cmpngt_ss(__m128 __a, __m128 __b)
2848d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
2858d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m128)__builtin_shufflevector(__a,
2868d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt                                         __builtin_ia32_cmpss(__b, __a, 5),
2878d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt                                         4, 1, 2, 3);
2888d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
2898d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
2908d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
2918d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cmpngt_ps(__m128 __a, __m128 __b)
2928d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
2938d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m128)__builtin_ia32_cmpps(__b, __a, 5);
2948d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
2958d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
2961846323989242844f0e857458a8939fa5836429cDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
2971f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_cmpnge_ss(__m128 __a, __m128 __b)
2981f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{
2998d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m128)__builtin_shufflevector(__a,
3006c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt                                         __builtin_ia32_cmpss(__b, __a, 6),
3018d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt                                         4, 1, 2, 3);
3028d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
3038d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
3048d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
30504949598a23f501be6eec21697465fd46a28840aDmitry Shmidt_mm_cmpnge_ps(__m128 __a, __m128 __b)
3068d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
3078d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m128)__builtin_ia32_cmpps(__b, __a, 6);
3088d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
3098d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
3108d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
3111f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_cmpord_ss(__m128 __a, __m128 __b)
3128d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
3131846323989242844f0e857458a8939fa5836429cDmitry Shmidt  return (__m128)__builtin_ia32_cmpss(__a, __b, 7);
3141846323989242844f0e857458a8939fa5836429cDmitry Shmidt}
3151846323989242844f0e857458a8939fa5836429cDmitry Shmidt
3161846323989242844f0e857458a8939fa5836429cDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
3171846323989242844f0e857458a8939fa5836429cDmitry Shmidt_mm_cmpord_ps(__m128 __a, __m128 __b)
3181846323989242844f0e857458a8939fa5836429cDmitry Shmidt{
3191846323989242844f0e857458a8939fa5836429cDmitry Shmidt  return (__m128)__builtin_ia32_cmpps(__a, __b, 7);
3201846323989242844f0e857458a8939fa5836429cDmitry Shmidt}
3211846323989242844f0e857458a8939fa5836429cDmitry Shmidt
3221846323989242844f0e857458a8939fa5836429cDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
3231846323989242844f0e857458a8939fa5836429cDmitry Shmidt_mm_cmpunord_ss(__m128 __a, __m128 __b)
3241846323989242844f0e857458a8939fa5836429cDmitry Shmidt{
3251846323989242844f0e857458a8939fa5836429cDmitry Shmidt  return (__m128)__builtin_ia32_cmpss(__a, __b, 3);
3268d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
3278d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
3288d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
3298d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cmpunord_ps(__m128 __a, __m128 __b)
3308d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
3318d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m128)__builtin_ia32_cmpps(__a, __b, 3);
3328d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
3338d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
3348d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__))
3358d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_comieq_ss(__m128 __a, __m128 __b)
3368d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
3378d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __builtin_ia32_comieq(__a, __b);
3388d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
3398d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
3408d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__))
3418d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_comilt_ss(__m128 __a, __m128 __b)
3428d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
3438d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __builtin_ia32_comilt(__a, __b);
3448d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
3456c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt
3466c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__))
3476c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt_mm_comile_ss(__m128 __a, __m128 __b)
3486c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt{
3496c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt  return __builtin_ia32_comile(__a, __b);
3506c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt}
3516c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt
3526c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__))
3536c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt_mm_comigt_ss(__m128 __a, __m128 __b)
3546c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt{
3556c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt  return __builtin_ia32_comigt(__a, __b);
3566c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt}
3576c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt
3588d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__))
3598d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_comige_ss(__m128 __a, __m128 __b)
3608d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
3618d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __builtin_ia32_comige(__a, __b);
3628d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
3631f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
3641f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__))
3651f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_comineq_ss(__m128 __a, __m128 __b)
3661f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{
3671f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt  return __builtin_ia32_comineq(__a, __b);
3681f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt}
3691f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
3701f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__))
3711f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_ucomieq_ss(__m128 __a, __m128 __b)
3721f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{
3731f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt  return __builtin_ia32_ucomieq(__a, __b);
3741f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt}
3751f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
3761f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__))
3771f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_ucomilt_ss(__m128 __a, __m128 __b)
3781f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{
3791f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt  return __builtin_ia32_ucomilt(__a, __b);
3808d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
3811f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
3828d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__))
3838d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_ucomile_ss(__m128 __a, __m128 __b)
3848d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
3858d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __builtin_ia32_ucomile(__a, __b);
3868d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
3878d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
3881f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__))
3891f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_ucomigt_ss(__m128 __a, __m128 __b)
3901f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{
3911f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt  return __builtin_ia32_ucomigt(__a, __b);
3921f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt}
3931f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
3941f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__))
3951f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_ucomige_ss(__m128 __a, __m128 __b)
3961f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{
3971f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt  return __builtin_ia32_ucomige(__a, __b);
3981f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt}
3991f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
4001f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__))
4011f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_ucomineq_ss(__m128 __a, __m128 __b)
4021f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{
4031f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt  return __builtin_ia32_ucomineq(__a, __b);
4041f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt}
4051f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
4068d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__))
4078d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvtss_si32(__m128 __a)
4081f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{
4091f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt  return __builtin_ia32_cvtss2si(__a);
4101f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt}
4111f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
4121f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__))
4131f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_cvt_ss2si(__m128 __a)
4141f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{
4151f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt  return _mm_cvtss_si32(__a);
4161f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt}
4171f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
4181f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt#ifdef __x86_64__
4191f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
4201f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ long long __attribute__((__always_inline__, __nodebug__))
4211f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_cvtss_si64(__m128 __a)
4221f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{
4231f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt  return __builtin_ia32_cvtss2si64(__a);
4241f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt}
4251f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
4261f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt#endif
4271f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
4281f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
4291f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_cvtps_pi32(__m128 __a)
4301f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{
4311f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt  return (__m64)__builtin_ia32_cvtps2pi(__a);
4321f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt}
4331f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
4341f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
4351f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_cvt_ps2pi(__m128 __a)
4361f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{
4371f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt  return _mm_cvtps_pi32(__a);
4388d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
4398d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
4408d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__))
4418d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvttss_si32(__m128 __a)
4428d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
4438d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __a[0];
4448d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
4458d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
4468d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__))
4478d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvtt_ss2si(__m128 __a)
4488d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
4498d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return _mm_cvttss_si32(__a);
4508d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
4518d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
4528d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ long long __attribute__((__always_inline__, __nodebug__))
4538d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvttss_si64(__m128 __a)
4548d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
4558d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __a[0];
4568d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
4578d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
4588d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
4598d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvttps_pi32(__m128 __a)
4608d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
4618d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m64)__builtin_ia32_cvttps2pi(__a);
4628d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
4638d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
4648d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
4658d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvtt_ps2pi(__m128 __a)
4668d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
4678d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return _mm_cvttps_pi32(__a);
4688d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
4698d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
4708d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
4718d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvtsi32_ss(__m128 __a, int __b)
4728d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
4738d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __a[0] = __b;
4748d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __a;
4758347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt}
4768d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
4778d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
4788d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvt_si2ss(__m128 __a, int __b)
4798347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt{
4808d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return _mm_cvtsi32_ss(__a, __b);
4818d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
4828d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
4838d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#ifdef __x86_64__
4848d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
4858347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
4868d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvtsi64_ss(__m128 __a, long long __b)
4878d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
4888d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __a[0] = __b;
4898d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __a;
4908d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
4918d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
4928347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt#endif
4938d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
4948d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
4958d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvtpi32_ps(__m128 __a, __m64 __b)
4968d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
4978d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __builtin_ia32_cvtpi2ps(__a, (__v2si)__b);
4988d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
4998347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt
5008d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
5018d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvt_pi2ps(__m128 __a, __m64 __b)
5028d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
5038347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt  return _mm_cvtpi32_ps(__a, __b);
5048d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
5058d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
5068d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ float __attribute__((__always_inline__, __nodebug__))
5078d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvtss_f32(__m128 __a)
5088d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
5098d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __a[0];
5108d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
5118347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt
5128d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
5138d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_loadh_pi(__m128 __a, const __m64 *__p)
5148d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
5158d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  typedef float __mm_loadh_pi_v2f32 __attribute__((__vector_size__(8)));
5168d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  struct __mm_loadh_pi_struct {
5178d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    __mm_loadh_pi_v2f32 __u;
5188d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  } __attribute__((__packed__, __may_alias__));
5198d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __mm_loadh_pi_v2f32 __b = ((struct __mm_loadh_pi_struct*)__p)->__u;
5208d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1);
5218d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5);
5228d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
5238347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt
5248d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
5258d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_loadl_pi(__m128 __a, const __m64 *__p)
5268d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
5278d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  typedef float __mm_loadl_pi_v2f32 __attribute__((__vector_size__(8)));
5288d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  struct __mm_loadl_pi_struct {
5298d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    __mm_loadl_pi_v2f32 __u;
5308d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  } __attribute__((__packed__, __may_alias__));
5318d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __mm_loadl_pi_v2f32 __b = ((struct __mm_loadl_pi_struct*)__p)->__u;
5328347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt  __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1);
5338d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3);
5348d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
5358d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
5368d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
5378d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_load_ss(const float *__p)
5388d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
5398d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  struct __mm_load_ss_struct {
5408d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    float __u;
5418d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  } __attribute__((__packed__, __may_alias__));
5428d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  float __u = ((struct __mm_load_ss_struct*)__p)->__u;
5438d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m128){ __u, 0, 0, 0 };
5448d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
5458d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
5468347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
5478d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_load1_ps(const float *__p)
5488d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
5498d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  struct __mm_load1_ps_struct {
5508d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    float __u;
5518d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  } __attribute__((__packed__, __may_alias__));
5528d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  float __u = ((struct __mm_load1_ps_struct*)__p)->__u;
5538347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt  return (__m128){ __u, __u, __u, __u };
5548d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
5558d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
5568d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define        _mm_load_ps1(p) _mm_load1_ps(p)
5578d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
5588d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
5598d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_load_ps(const float *__p)
5608d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
5618d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return *(__m128*)__p;
5628d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
5638d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
5648d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
56504949598a23f501be6eec21697465fd46a28840aDmitry Shmidt_mm_loadu_ps(const float *__p)
5668347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt{
56704949598a23f501be6eec21697465fd46a28840aDmitry Shmidt  struct __loadu_ps {
56804949598a23f501be6eec21697465fd46a28840aDmitry Shmidt    __m128 __v;
56904949598a23f501be6eec21697465fd46a28840aDmitry Shmidt  } __attribute__((__packed__, __may_alias__));
57004949598a23f501be6eec21697465fd46a28840aDmitry Shmidt  return ((struct __loadu_ps*)__p)->__v;
57104949598a23f501be6eec21697465fd46a28840aDmitry Shmidt}
5728347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt
57304949598a23f501be6eec21697465fd46a28840aDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
57404949598a23f501be6eec21697465fd46a28840aDmitry Shmidt_mm_loadr_ps(const float *__p)
57504949598a23f501be6eec21697465fd46a28840aDmitry Shmidt{
5768347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt  __m128 __a = _mm_load_ps(__p);
5778d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __builtin_shufflevector(__a, __a, 3, 2, 1, 0);
5788d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
5798d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
5808d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
5818d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_set_ss(float __w)
5828347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt{
5838d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m128){ __w, 0, 0, 0 };
5841f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt}
5851f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
5861f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
5871f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_set1_ps(float __w)
5881f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{
5891f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt  return (__m128){ __w, __w, __w, __w };
5901f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt}
5911f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
5921f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt// Microsoft specific.
5931f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
5948347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt_mm_set_ps1(float __w)
5951f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{
596a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt    return _mm_set1_ps(__w);
597a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt}
598a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt
599a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
600a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt_mm_set_ps(float __z, float __y, float __x, float __w)
601a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt{
602a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt  return (__m128){ __w, __x, __y, __z };
603a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt}
6048347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt
605a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
60644c957860ca714a86357591f39aff0bfa904c743Dmitry Shmidt_mm_setr_ps(float __z, float __y, float __x, float __w)
60744c957860ca714a86357591f39aff0bfa904c743Dmitry Shmidt{
60844c957860ca714a86357591f39aff0bfa904c743Dmitry Shmidt  return (__m128){ __z, __y, __x, __w };
60944c957860ca714a86357591f39aff0bfa904c743Dmitry Shmidt}
61044c957860ca714a86357591f39aff0bfa904c743Dmitry Shmidt
61144c957860ca714a86357591f39aff0bfa904c743Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__))
6128347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt_mm_setzero_ps(void)
61344c957860ca714a86357591f39aff0bfa904c743Dmitry Shmidt{
6146c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt  return (__m128){ 0, 0, 0, 0 };
6156c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt}
6168347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt
6176c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidtstatic __inline__ void __attribute__((__always_inline__))
6188d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_storeh_pi(__m64 *__p, __m128 __a)
6198d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
6208d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __builtin_ia32_storehps((__v2si *)__p, __a);
6218d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
6228d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
6238d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ void __attribute__((__always_inline__))
624c5ec7f57ead87efa365800228aa0b09a12d9e6c4Dmitry Shmidt_mm_storel_pi(__m64 *__p, __m128 __a)
625c5ec7f57ead87efa365800228aa0b09a12d9e6c4Dmitry Shmidt{
626c5ec7f57ead87efa365800228aa0b09a12d9e6c4Dmitry Shmidt  __builtin_ia32_storelps((__v2si *)__p, __a);
627f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt}
6288d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
6298d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ void __attribute__((__always_inline__))
630f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt_mm_store_ss(float *__p, __m128 __a)
631f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt{
632f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt  struct __mm_store_ss_struct {
6338d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    float __u;
6348d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  } __attribute__((__packed__, __may_alias__));
6358d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  ((struct __mm_store_ss_struct*)__p)->__u = __a[0];
6368d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
6378d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
6388d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ void __attribute__((__always_inline__, __nodebug__))
6398d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_storeu_ps(float *__p, __m128 __a)
640f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt{
6418d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __builtin_ia32_storeups(__p, __a);
642f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt}
643f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt
644f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidtstatic __inline__ void __attribute__((__always_inline__, __nodebug__))
645f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt_mm_store1_ps(float *__p, __m128 __a)
646f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt{
647f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt  __a = __builtin_shufflevector(__a, __a, 0, 0, 0, 0);
6488d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  _mm_storeu_ps(__p, __a);
6498d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
65004949598a23f501be6eec21697465fd46a28840aDmitry Shmidt
6515460547a121207cf7a99eac45e05fcdd83be3161Dmitry Shmidtstatic __inline__ void __attribute__((__always_inline__, __nodebug__))
6525460547a121207cf7a99eac45e05fcdd83be3161Dmitry Shmidt_mm_store_ps1(float *__p, __m128 __a)
6535460547a121207cf7a99eac45e05fcdd83be3161Dmitry Shmidt{
6545460547a121207cf7a99eac45e05fcdd83be3161Dmitry Shmidt    return _mm_store1_ps(__p, __a);
6555460547a121207cf7a99eac45e05fcdd83be3161Dmitry Shmidt}
6565460547a121207cf7a99eac45e05fcdd83be3161Dmitry Shmidt
6575460547a121207cf7a99eac45e05fcdd83be3161Dmitry Shmidtstatic __inline__ void __attribute__((__always_inline__, __nodebug__))
6585460547a121207cf7a99eac45e05fcdd83be3161Dmitry Shmidt_mm_store_ps(float *__p, __m128 __a)
6595460547a121207cf7a99eac45e05fcdd83be3161Dmitry Shmidt{
6605460547a121207cf7a99eac45e05fcdd83be3161Dmitry Shmidt  *(__m128 *)__p = __a;
6615460547a121207cf7a99eac45e05fcdd83be3161Dmitry Shmidt}
66204949598a23f501be6eec21697465fd46a28840aDmitry Shmidt
66304949598a23f501be6eec21697465fd46a28840aDmitry Shmidtstatic __inline__ void __attribute__((__always_inline__, __nodebug__))
6647a5e50a0554bee77a9da492ea3d86f46147f1671Dmitry Shmidt_mm_storer_ps(float *__p, __m128 __a)
6657a5e50a0554bee77a9da492ea3d86f46147f1671Dmitry Shmidt{
6667a5e50a0554bee77a9da492ea3d86f46147f1671Dmitry Shmidt  __a = __builtin_shufflevector(__a, __a, 3, 2, 1, 0);
6677a5e50a0554bee77a9da492ea3d86f46147f1671Dmitry Shmidt  _mm_store_ps(__p, __a);
6687a5e50a0554bee77a9da492ea3d86f46147f1671Dmitry Shmidt}
6697a5e50a0554bee77a9da492ea3d86f46147f1671Dmitry Shmidt
67004949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#define _MM_HINT_T0 3
67104949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#define _MM_HINT_T1 2
67204949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#define _MM_HINT_T2 1
67304949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#define _MM_HINT_NTA 0
67404949598a23f501be6eec21697465fd46a28840aDmitry Shmidt
67504949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#ifndef _MSC_VER
67604949598a23f501be6eec21697465fd46a28840aDmitry Shmidt/* FIXME: We have to #define this because "sel" must be a constant integer, and
67704949598a23f501be6eec21697465fd46a28840aDmitry Shmidt   Sema doesn't do any form of constant propagation yet. */
67804949598a23f501be6eec21697465fd46a28840aDmitry Shmidt
6796c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), 0, (sel)))
6806c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt#endif
6816c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt
6826c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidtstatic __inline__ void __attribute__((__always_inline__, __nodebug__))
6836c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt_mm_stream_pi(__m64 *__p, __m64 __a)
6846c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt{
6856c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt  __builtin_ia32_movntq(__p, __a);
6866c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt}
6876c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt
6888d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ void __attribute__((__always_inline__, __nodebug__))
6898d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_stream_ps(float *__p, __m128 __a)
6908d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
6918d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __builtin_ia32_movntps(__p, __a);
6928d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
6938d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
6948d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ void __attribute__((__always_inline__, __nodebug__))
6958d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_sfence(void)
6967832adbbd72a1b784b7fb74a71a5d4085b0cb0d3Dmitry Shmidt{
6978d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __builtin_ia32_sfence();
698f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt}
6998d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
7008d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__))
7018d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_extract_pi16(__m64 __a, int __n)
7028d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
7038d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __v4hi __b = (__v4hi)__a;
7048d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (unsigned short)__b[__n & 3];
7058d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
7068d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
7078d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
7088d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_insert_pi16(__m64 __a, int __d, int __n)
7098d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
7108d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt   __v4hi __b = (__v4hi)__a;
7118d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt   __b[__n & 3] = __d;
7128d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt   return (__m64)__b;
7138d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
7148d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
7158d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
716f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt_mm_max_pi16(__m64 __a, __m64 __b)
7178d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
7188d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m64)__builtin_ia32_pmaxsw((__v4hi)__a, (__v4hi)__b);
7198d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
720f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt
7218d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
722f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt_mm_max_pu8(__m64 __a, __m64 __b)
7238d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
724f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt  return (__m64)__builtin_ia32_pmaxub((__v8qi)__a, (__v8qi)__b);
725f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt}
7268d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
727f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
7288d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_min_pi16(__m64 __a, __m64 __b)
729f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt{
730f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt  return (__m64)__builtin_ia32_pminsw((__v4hi)__a, (__v4hi)__b);
731f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt}
732f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt
733f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
734f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt_mm_min_pu8(__m64 __a, __m64 __b)
735f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt{
736f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt  return (__m64)__builtin_ia32_pminub((__v8qi)__a, (__v8qi)__b);
737f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt}
738f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt
739f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__))
740f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt_mm_movemask_pi8(__m64 __a)
741f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt{
742f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt  return __builtin_ia32_pmovmskb((__v8qi)__a);
743f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt}
744f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt
745f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
746f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt_mm_mulhi_pu16(__m64 __a, __m64 __b)
747f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt{
748f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt  return (__m64)__builtin_ia32_pmulhuw((__v4hi)__a, (__v4hi)__b);
749f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt}
750f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt
751f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt#define _mm_shuffle_pi16(a, n) __extension__ ({ \
752f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt  __m64 __a = (a); \
753f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt  (__m64)__builtin_ia32_pshufw((__v4hi)__a, (n)); })
754f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt
755f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidtstatic __inline__ void __attribute__((__always_inline__, __nodebug__))
756f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt_mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)
757f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt{
758f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt  __builtin_ia32_maskmovq((__v8qi)__d, (__v8qi)__n, __p);
759f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt}
760f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt
761f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
762f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt_mm_avg_pu8(__m64 __a, __m64 __b)
763f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt{
764f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt  return (__m64)__builtin_ia32_pavgb((__v8qi)__a, (__v8qi)__b);
765f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt}
766f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt
7678d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
7688d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_avg_pu16(__m64 __a, __m64 __b)
7698d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
770f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt  return (__m64)__builtin_ia32_pavgw((__v4hi)__a, (__v4hi)__b);
7718d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
772f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt
773f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
774f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt_mm_sad_pu8(__m64 __a, __m64 __b)
775f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt{
776f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt  return (__m64)__builtin_ia32_psadbw((__v8qi)__a, (__v8qi)__b);
777f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt}
778f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt
779f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidtstatic __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
780f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt_mm_getcsr(void)
781f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt{
782f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt  return __builtin_ia32_stmxcsr();
783f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt}
784f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt
785f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidtstatic __inline__ void __attribute__((__always_inline__, __nodebug__))
786f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt_mm_setcsr(unsigned int __i)
787f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt{
788f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt  __builtin_ia32_ldmxcsr(__i);
789f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt}
790f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt
791f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt#define _mm_shuffle_ps(a, b, mask) __extension__ ({ \
792f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt  __m128 __a = (a); \
793f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt  __m128 __b = (b); \
794f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt  (__m128)__builtin_shufflevector((__v4sf)__a, (__v4sf)__b, \
795f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt                                  (mask) & 0x3, ((mask) & 0xc) >> 2, \
7968d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt                                  (((mask) & 0x30) >> 4) + 4, \
797d5e4923d04122f81300fa68fb07d64ede28fd44dDmitry Shmidt                                  (((mask) & 0xc0) >> 6) + 4); })
7988d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
7998d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
80004949598a23f501be6eec21697465fd46a28840aDmitry Shmidt_mm_unpackhi_ps(__m128 __a, __m128 __b)
80104949598a23f501be6eec21697465fd46a28840aDmitry Shmidt{
80204949598a23f501be6eec21697465fd46a28840aDmitry Shmidt  return __builtin_shufflevector(__a, __b, 2, 6, 3, 7);
80368d0e3ed07847339aedfac8e02f50db68c702e52Dmitry Shmidt}
8046c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt
80504949598a23f501be6eec21697465fd46a28840aDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
80604949598a23f501be6eec21697465fd46a28840aDmitry Shmidt_mm_unpacklo_ps(__m128 __a, __m128 __b)
80768d0e3ed07847339aedfac8e02f50db68c702e52Dmitry Shmidt{
80804949598a23f501be6eec21697465fd46a28840aDmitry Shmidt  return __builtin_shufflevector(__a, __b, 0, 4, 1, 5);
80904949598a23f501be6eec21697465fd46a28840aDmitry Shmidt}
81004949598a23f501be6eec21697465fd46a28840aDmitry Shmidt
81104949598a23f501be6eec21697465fd46a28840aDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
81204949598a23f501be6eec21697465fd46a28840aDmitry Shmidt_mm_move_ss(__m128 __a, __m128 __b)
81304949598a23f501be6eec21697465fd46a28840aDmitry Shmidt{
81404949598a23f501be6eec21697465fd46a28840aDmitry Shmidt  return __builtin_shufflevector(__a, __b, 4, 1, 2, 3);
81504949598a23f501be6eec21697465fd46a28840aDmitry Shmidt}
81668d0e3ed07847339aedfac8e02f50db68c702e52Dmitry Shmidt
81768d0e3ed07847339aedfac8e02f50db68c702e52Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
8186c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt_mm_movehl_ps(__m128 __a, __m128 __b)
81904949598a23f501be6eec21697465fd46a28840aDmitry Shmidt{
82004949598a23f501be6eec21697465fd46a28840aDmitry Shmidt  return __builtin_shufflevector(__a, __b, 6, 7, 2, 3);
82168d0e3ed07847339aedfac8e02f50db68c702e52Dmitry Shmidt}
82268d0e3ed07847339aedfac8e02f50db68c702e52Dmitry Shmidt
8236c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
82468d0e3ed07847339aedfac8e02f50db68c702e52Dmitry Shmidt_mm_movelh_ps(__m128 __a, __m128 __b)
82568d0e3ed07847339aedfac8e02f50db68c702e52Dmitry Shmidt{
82668d0e3ed07847339aedfac8e02f50db68c702e52Dmitry Shmidt  return __builtin_shufflevector(__a, __b, 0, 1, 4, 5);
8276c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt}
82868d0e3ed07847339aedfac8e02f50db68c702e52Dmitry Shmidt
82904949598a23f501be6eec21697465fd46a28840aDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
83004949598a23f501be6eec21697465fd46a28840aDmitry Shmidt_mm_cvtpi16_ps(__m64 __a)
83104949598a23f501be6eec21697465fd46a28840aDmitry Shmidt{
83204949598a23f501be6eec21697465fd46a28840aDmitry Shmidt  __m64 __b, __c;
833a38abf9af7bec7e89dbfb39ac7bb77223fe47c72Dmitry Shmidt  __m128 __r;
834a38abf9af7bec7e89dbfb39ac7bb77223fe47c72Dmitry Shmidt
835a38abf9af7bec7e89dbfb39ac7bb77223fe47c72Dmitry Shmidt  __b = _mm_setzero_si64();
836a38abf9af7bec7e89dbfb39ac7bb77223fe47c72Dmitry Shmidt  __b = _mm_cmpgt_pi16(__b, __a);
837a38abf9af7bec7e89dbfb39ac7bb77223fe47c72Dmitry Shmidt  __c = _mm_unpackhi_pi16(__a, __b);
838a38abf9af7bec7e89dbfb39ac7bb77223fe47c72Dmitry Shmidt  __r = _mm_setzero_ps();
839a38abf9af7bec7e89dbfb39ac7bb77223fe47c72Dmitry Shmidt  __r = _mm_cvtpi32_ps(__r, __c);
840a38abf9af7bec7e89dbfb39ac7bb77223fe47c72Dmitry Shmidt  __r = _mm_movelh_ps(__r, __r);
84168d0e3ed07847339aedfac8e02f50db68c702e52Dmitry Shmidt  __c = _mm_unpacklo_pi16(__a, __b);
8426c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt  __r = _mm_cvtpi32_ps(__r, __c);
84304949598a23f501be6eec21697465fd46a28840aDmitry Shmidt
84404949598a23f501be6eec21697465fd46a28840aDmitry Shmidt  return __r;
84504949598a23f501be6eec21697465fd46a28840aDmitry Shmidt}
84604949598a23f501be6eec21697465fd46a28840aDmitry Shmidt
84704949598a23f501be6eec21697465fd46a28840aDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
848bd14a57187b024f49f5b9ace55ef457d8d04650aDmitry Shmidt_mm_cvtpu16_ps(__m64 __a)
849bd14a57187b024f49f5b9ace55ef457d8d04650aDmitry Shmidt{
850bd14a57187b024f49f5b9ace55ef457d8d04650aDmitry Shmidt  __m64 __b, __c;
851bd14a57187b024f49f5b9ace55ef457d8d04650aDmitry Shmidt  __m128 __r;
852bd14a57187b024f49f5b9ace55ef457d8d04650aDmitry Shmidt
853bd14a57187b024f49f5b9ace55ef457d8d04650aDmitry Shmidt  __b = _mm_setzero_si64();
854bd14a57187b024f49f5b9ace55ef457d8d04650aDmitry Shmidt  __c = _mm_unpackhi_pi16(__a, __b);
855a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt  __r = _mm_setzero_ps();
856a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt  __r = _mm_cvtpi32_ps(__r, __c);
857a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt  __r = _mm_movelh_ps(__r, __r);
858a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt  __c = _mm_unpacklo_pi16(__a, __b);
859a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt  __r = _mm_cvtpi32_ps(__r, __c);
860a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt
8618d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __r;
8628d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
8638d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
8642e67f06149ff649fb6f8782bad041d3d9124685eDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
8658d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvtpi8_ps(__m64 __a)
8668d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
8678d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __m64 __b;
86861d9df3e62aaa0e87ad05452fcb95142159a17b6Dmitry Shmidt
86961d9df3e62aaa0e87ad05452fcb95142159a17b6Dmitry Shmidt  __b = _mm_setzero_si64();
87004949598a23f501be6eec21697465fd46a28840aDmitry Shmidt  __b = _mm_cmpgt_pi8(__b, __a);
871f21452aea786ac056eb01f1cbba4f553bd502747Dmitry Shmidt  __b = _mm_unpacklo_pi8(__a, __b);
8728d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
8738d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return _mm_cvtpi16_ps(__b);
8748d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
8758d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
8768d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
8778d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvtpu8_ps(__m64 __a)
8788d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
8798d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __m64 __b;
8808d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
8818d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __b = _mm_setzero_si64();
8828d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __b = _mm_unpacklo_pi8(__a, __b);
8838d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
8848d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return _mm_cvtpi16_ps(__b);
8858d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
8868d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
8878d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
8888d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvtpi32x2_ps(__m64 __a, __m64 __b)
8898d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
8908d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __m128 __c;
8918d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
8928d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __c = _mm_setzero_ps();
8938d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __c = _mm_cvtpi32_ps(__c, __b);
8948d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __c = _mm_movelh_ps(__c, __c);
8958d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
8968d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return _mm_cvtpi32_ps(__c, __a);
8978d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
8988d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
8998d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
9008d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvtps_pi16(__m128 __a)
9018d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
9028d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __m64 __b, __c;
9038d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
9048d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __b = _mm_cvtps_pi32(__a);
9058d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __a = _mm_movehl_ps(__a, __a);
9068d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __c = _mm_cvtps_pi32(__a);
9078d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
9088d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return _mm_packs_pi32(__b, __c);
9098d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
9106c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt
9116c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
9121f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_cvtps_pi8(__m128 __a)
9131f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{
9141f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt  __m64 __b, __c;
9151f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
9168d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __b = _mm_cvtps_pi16(__a);
9178d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __c = _mm_setzero_si64();
9188d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
9198d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return _mm_packs_pi16(__b, __c);
9208d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
9218d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
9228d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__))
9238d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_movemask_ps(__m128 __a)
9248d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
9258d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return __builtin_ia32_movmskps(__a);
9268d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
9278d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
9288d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))
9298d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
9308d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_EXCEPT_INVALID    (0x0001)
9318d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_EXCEPT_DENORM     (0x0002)
9328d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_EXCEPT_DIV_ZERO   (0x0004)
9338d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_EXCEPT_OVERFLOW   (0x0008)
9348d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_EXCEPT_UNDERFLOW  (0x0010)
9358d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_EXCEPT_INEXACT    (0x0020)
9368d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_EXCEPT_MASK       (0x003f)
9378d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
9388d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_MASK_INVALID      (0x0080)
9398d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_MASK_DENORM       (0x0100)
9408d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_MASK_DIV_ZERO     (0x0200)
9418d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_MASK_OVERFLOW     (0x0400)
9428d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_MASK_UNDERFLOW    (0x0800)
9438d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_MASK_INEXACT      (0x1000)
9441f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt#define _MM_MASK_MASK         (0x1f80)
9458d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
9468d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_ROUND_NEAREST     (0x0000)
9478d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_ROUND_DOWN        (0x2000)
9488d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_ROUND_UP          (0x4000)
9498d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_ROUND_TOWARD_ZERO (0x6000)
9508d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_ROUND_MASK        (0x6000)
9518d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
9528d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_FLUSH_ZERO_MASK   (0x8000)
9538d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_FLUSH_ZERO_ON     (0x8000)
9548d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_FLUSH_ZERO_OFF    (0x0000)
9558d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
9568d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_GET_EXCEPTION_MASK() (_mm_getcsr() & _MM_MASK_MASK)
9578d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_GET_EXCEPTION_STATE() (_mm_getcsr() & _MM_EXCEPT_MASK)
9588d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_GET_FLUSH_ZERO_MODE() (_mm_getcsr() & _MM_FLUSH_ZERO_MASK)
9598d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_GET_ROUNDING_MODE() (_mm_getcsr() & _MM_ROUND_MASK)
9608d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
9618d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_SET_EXCEPTION_MASK(x) (_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (x)))
9628d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_SET_EXCEPTION_STATE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (x)))
9638d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_SET_FLUSH_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (x)))
9648d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_SET_ROUNDING_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (x)))
9658d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
9668d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \
9678d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtdo { \
9688d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  __m128 tmp3, tmp2, tmp1, tmp0; \
9698d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  tmp0 = _mm_unpacklo_ps((row0), (row1)); \
9708d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  tmp2 = _mm_unpacklo_ps((row2), (row3)); \
9718d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  tmp1 = _mm_unpackhi_ps((row0), (row1)); \
9728d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  tmp3 = _mm_unpackhi_ps((row2), (row3)); \
9738d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  (row0) = _mm_movelh_ps(tmp0, tmp2); \
9748d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  (row1) = _mm_movehl_ps(tmp2, tmp0); \
9758d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  (row2) = _mm_movelh_ps(tmp1, tmp3); \
9768d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  (row3) = _mm_movehl_ps(tmp3, tmp1); \
9778d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} while (0)
9788d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
9796c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt/* Aliases for compatibility. */
9808d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _m_pextrw _mm_extract_pi16
9818d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _m_pinsrw _mm_insert_pi16
9828d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _m_pmaxsw _mm_max_pi16
9836c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt#define _m_pmaxub _mm_max_pu8
9846c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt#define _m_pminsw _mm_min_pi16
9858d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _m_pminub _mm_min_pu8
9868d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _m_pmovmskb _mm_movemask_pi8
9878d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _m_pmulhuw _mm_mulhi_pu16
98804949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#define _m_pshufw _mm_shuffle_pi16
98904949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#define _m_maskmovq _mm_maskmove_si64
990f21452aea786ac056eb01f1cbba4f553bd502747Dmitry Shmidt#define _m_pavgb _mm_avg_pu8
99104949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#define _m_pavgw _mm_avg_pu16
99204949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#define _m_psadbw _mm_sad_pu8
99304949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#define _m_ _mm_
99404949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#define _m_ _mm_
99504949598a23f501be6eec21697465fd46a28840aDmitry Shmidt
99604949598a23f501be6eec21697465fd46a28840aDmitry Shmidt/* Ugly hack for backwards-compatibility (compatible with gcc) */
99704949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#ifdef __SSE2__
998f21452aea786ac056eb01f1cbba4f553bd502747Dmitry Shmidt#include <emmintrin.h>
999f21452aea786ac056eb01f1cbba4f553bd502747Dmitry Shmidt#endif
1000f21452aea786ac056eb01f1cbba4f553bd502747Dmitry Shmidt
1001f21452aea786ac056eb01f1cbba4f553bd502747Dmitry Shmidt#endif /* __SSE__ */
1002f21452aea786ac056eb01f1cbba4f553bd502747Dmitry Shmidt
1003f21452aea786ac056eb01f1cbba4f553bd502747Dmitry Shmidt#endif /* __XMMINTRIN_H */
1004f21452aea786ac056eb01f1cbba4f553bd502747Dmitry Shmidt