1/*===---- ammintrin.h - SSE4a intrinsics -----------------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24#ifndef __AMMINTRIN_H 25#define __AMMINTRIN_H 26 27#include <pmmintrin.h> 28 29/* Define the default attributes for the functions in this file. */ 30#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"))) 31 32/// \brief Extracts the specified bits from the lower 64 bits of the 128-bit 33/// integer vector operand at the index idx and of the length len. 34/// 35/// \headerfile <x86intrin.h> 36/// 37/// \code 38/// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx); 39/// \endcode 40/// 41/// \code 42/// This intrinsic corresponds to the \c EXTRQ instruction. 43/// \endcode 44/// 45/// \param x 46/// The value from which bits are extracted. 47/// \param len 48/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0] 49/// are zero, the length is interpreted as 64. 50/// \param idx 51/// Bits [5:0] specify the index of the least significant bit; the other 52/// bits are ignored. If the sum of the index and length is greater than 53/// 64, the result is undefined. If the length and index are both zero, 54/// bits [63:0] of parameter x are extracted. If the length is zero 55/// but the index is non-zero, the result is undefined. 56/// \returns A 128-bit integer vector whose lower 64 bits contain the bits 57/// extracted from the source operand. 58#define _mm_extracti_si64(x, len, idx) \ 59 ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \ 60 (char)(len), (char)(idx))) 61 62/// \brief Extracts the specified bits from the lower 64 bits of the 128-bit 63/// integer vector operand at the index and of the length specified by __y. 64/// 65/// \headerfile <x86intrin.h> 66/// 67/// \code 68/// This intrinsic corresponds to the \c EXTRQ instruction. 69/// \endcode 70/// 71/// \param __x 72/// The value from which bits are extracted. 73/// \param __y 74/// Specifies the index of the least significant bit at [13:8] 75/// and the length at [5:0]; all other bits are ignored. 76/// If bits [5:0] are zero, the length is interpreted as 64. 77/// If the sum of the index and length is greater than 64, the result is 78/// undefined. If the length and index are both zero, bits [63:0] of 79/// parameter __x are extracted. If the length is zero but the index is 80/// non-zero, the result is undefined. 81/// \returns A 128-bit vector whose lower 64 bits contain the bits extracted 82/// from the source operand. 83static __inline__ __m128i __DEFAULT_FN_ATTRS 84_mm_extract_si64(__m128i __x, __m128i __y) 85{ 86 return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y); 87} 88 89/// \brief Inserts bits of a specified length from the source integer vector 90/// y into the lower 64 bits of the destination integer vector x at the 91/// index idx and of the length len. 92/// 93/// \headerfile <x86intrin.h> 94/// 95/// \code 96/// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len, 97/// const int idx); 98/// \endcode 99/// 100/// \code 101/// This intrinsic corresponds to the \c INSERTQ instruction. 102/// \endcode 103/// 104/// \param x 105/// The destination operand where bits will be inserted. The inserted bits 106/// are defined by the length len and by the index idx specifying the least 107/// significant bit. 108/// \param y 109/// The source operand containing the bits to be extracted. The extracted 110/// bits are the least significant bits of operand y of length len. 111/// \param len 112/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0] 113/// are zero, the length is interpreted as 64. 114/// \param idx 115/// Bits [5:0] specify the index of the least significant bit; the other 116/// bits are ignored. If the sum of the index and length is greater than 117/// 64, the result is undefined. If the length and index are both zero, 118/// bits [63:0] of parameter y are inserted into parameter x. If the 119/// length is zero but the index is non-zero, the result is undefined. 120/// \returns A 128-bit integer vector containing the original lower 64-bits 121/// of destination operand x with the specified bitfields replaced by the 122/// lower bits of source operand y. The upper 64 bits of the return value 123/// are undefined. 124 125#define _mm_inserti_si64(x, y, len, idx) \ 126 ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \ 127 (__v2di)(__m128i)(y), \ 128 (char)(len), (char)(idx))) 129 130/// \brief Inserts bits of a specified length from the source integer vector 131/// __y into the lower 64 bits of the destination integer vector __x at 132/// the index and of the length specified by __y. 133/// 134/// \headerfile <x86intrin.h> 135/// 136/// \code 137/// This intrinsic corresponds to the \c INSERTQ instruction. 138/// \endcode 139/// 140/// \param __x 141/// The destination operand where bits will be inserted. The inserted bits 142/// are defined by the length and by the index of the least significant bit 143/// specified by operand __y. 144/// \param __y 145/// The source operand containing the bits to be extracted. The extracted 146/// bits are the least significant bits of operand __y with length specified 147/// by bits [69:64]. These are inserted into the destination at the index 148/// specified by bits [77:72]; all other bits are ignored. 149/// If bits [69:64] are zero, the length is interpreted as 64. 150/// If the sum of the index and length is greater than 64, the result is 151/// undefined. If the length and index are both zero, bits [63:0] of 152/// parameter __y are inserted into parameter __x. If the length 153/// is zero but the index is non-zero, the result is undefined. 154/// \returns A 128-bit integer vector containing the original lower 64-bits 155/// of destination operand __x with the specified bitfields replaced by the 156/// lower bits of source operand __y. The upper 64 bits of the return value 157/// are undefined. 158 159static __inline__ __m128i __DEFAULT_FN_ATTRS 160_mm_insert_si64(__m128i __x, __m128i __y) 161{ 162 return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y); 163} 164 165/// \brief Stores a 64-bit double-precision value in a 64-bit memory location. 166/// To minimize caching, the data is flagged as non-temporal (unlikely to be 167/// used again soon). 168/// 169/// \headerfile <x86intrin.h> 170/// 171/// \code 172/// This intrinsic corresponds to the \c MOVNTSD instruction. 173/// \endcode 174/// 175/// \param __p 176/// The 64-bit memory location used to store the register value. 177/// \param __a 178/// The 64-bit double-precision floating-point register value to 179/// be stored. 180static __inline__ void __DEFAULT_FN_ATTRS 181_mm_stream_sd(double *__p, __m128d __a) 182{ 183 __builtin_ia32_movntsd(__p, (__v2df)__a); 184} 185 186/// \brief Stores a 32-bit single-precision floating-point value in a 32-bit 187/// memory location. To minimize caching, the data is flagged as 188/// non-temporal (unlikely to be used again soon). 189/// 190/// \headerfile <x86intrin.h> 191/// 192/// \code 193/// This intrinsic corresponds to the \c MOVNTSS instruction. 194/// \endcode 195/// 196/// \param __p 197/// The 32-bit memory location used to store the register value. 198/// \param __a 199/// The 32-bit single-precision floating-point register value to 200/// be stored. 201static __inline__ void __DEFAULT_FN_ATTRS 202_mm_stream_ss(float *__p, __m128 __a) 203{ 204 __builtin_ia32_movntss(__p, (__v4sf)__a); 205} 206 207#undef __DEFAULT_FN_ATTRS 208 209#endif /* __AMMINTRIN_H */ 210