1/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24#ifndef __IMMINTRIN_H 25#error "Never use <fmaintrin.h> directly; include <immintrin.h> instead." 26#endif 27 28#ifndef __FMAINTRIN_H 29#define __FMAINTRIN_H 30 31/* Define the default attributes for the functions in this file. */ 32#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma"))) 33 34static __inline__ __m128 __DEFAULT_FN_ATTRS 35_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) 36{ 37 return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 38} 39 40static __inline__ __m128d __DEFAULT_FN_ATTRS 41_mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) 42{ 43 return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); 44} 45 46static __inline__ __m128 __DEFAULT_FN_ATTRS 47_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) 48{ 49 return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 50} 51 52static __inline__ __m128d __DEFAULT_FN_ATTRS 53_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) 54{ 55 return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C); 56} 57 58static __inline__ __m128 __DEFAULT_FN_ATTRS 59_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) 60{ 61 return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 62} 63 64static __inline__ __m128d __DEFAULT_FN_ATTRS 65_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) 66{ 67 return (__m128d)__builtin_ia32_vfmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); 68} 69 70static __inline__ __m128 __DEFAULT_FN_ATTRS 71_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) 72{ 73 return (__m128)__builtin_ia32_vfmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 74} 75 76static __inline__ __m128d __DEFAULT_FN_ATTRS 77_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) 78{ 79 return (__m128d)__builtin_ia32_vfmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C); 80} 81 82static __inline__ __m128 __DEFAULT_FN_ATTRS 83_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) 84{ 85 return (__m128)__builtin_ia32_vfnmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 86} 87 88static __inline__ __m128d __DEFAULT_FN_ATTRS 89_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) 90{ 91 return (__m128d)__builtin_ia32_vfnmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); 92} 93 94static __inline__ __m128 __DEFAULT_FN_ATTRS 95_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) 96{ 97 return (__m128)__builtin_ia32_vfnmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 98} 99 100static __inline__ __m128d __DEFAULT_FN_ATTRS 101_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) 102{ 103 return (__m128d)__builtin_ia32_vfnmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C); 104} 105 106static __inline__ __m128 __DEFAULT_FN_ATTRS 107_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) 108{ 109 return (__m128)__builtin_ia32_vfnmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 110} 111 112static __inline__ __m128d __DEFAULT_FN_ATTRS 113_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) 114{ 115 return (__m128d)__builtin_ia32_vfnmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); 116} 117 118static __inline__ __m128 __DEFAULT_FN_ATTRS 119_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) 120{ 121 return (__m128)__builtin_ia32_vfnmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 122} 123 124static __inline__ __m128d __DEFAULT_FN_ATTRS 125_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) 126{ 127 return (__m128d)__builtin_ia32_vfnmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C); 128} 129 130static __inline__ __m128 __DEFAULT_FN_ATTRS 131_mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) 132{ 133 return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 134} 135 136static __inline__ __m128d __DEFAULT_FN_ATTRS 137_mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) 138{ 139 return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); 140} 141 142static __inline__ __m128 __DEFAULT_FN_ATTRS 143_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) 144{ 145 return (__m128)__builtin_ia32_vfmsubaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 146} 147 148static __inline__ __m128d __DEFAULT_FN_ATTRS 149_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) 150{ 151 return (__m128d)__builtin_ia32_vfmsubaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); 152} 153 154static __inline__ __m256 __DEFAULT_FN_ATTRS 155_mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) 156{ 157 return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); 158} 159 160static __inline__ __m256d __DEFAULT_FN_ATTRS 161_mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) 162{ 163 return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); 164} 165 166static __inline__ __m256 __DEFAULT_FN_ATTRS 167_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) 168{ 169 return (__m256)__builtin_ia32_vfmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); 170} 171 172static __inline__ __m256d __DEFAULT_FN_ATTRS 173_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) 174{ 175 return (__m256d)__builtin_ia32_vfmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); 176} 177 178static __inline__ __m256 __DEFAULT_FN_ATTRS 179_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) 180{ 181 return (__m256)__builtin_ia32_vfnmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); 182} 183 184static __inline__ __m256d __DEFAULT_FN_ATTRS 185_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) 186{ 187 return (__m256d)__builtin_ia32_vfnmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); 188} 189 190static __inline__ __m256 __DEFAULT_FN_ATTRS 191_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) 192{ 193 return (__m256)__builtin_ia32_vfnmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); 194} 195 196static __inline__ __m256d __DEFAULT_FN_ATTRS 197_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) 198{ 199 return (__m256d)__builtin_ia32_vfnmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); 200} 201 202static __inline__ __m256 __DEFAULT_FN_ATTRS 203_mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) 204{ 205 return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); 206} 207 208static __inline__ __m256d __DEFAULT_FN_ATTRS 209_mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) 210{ 211 return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); 212} 213 214static __inline__ __m256 __DEFAULT_FN_ATTRS 215_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) 216{ 217 return (__m256)__builtin_ia32_vfmsubaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); 218} 219 220static __inline__ __m256d __DEFAULT_FN_ATTRS 221_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) 222{ 223 return (__m256d)__builtin_ia32_vfmsubaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); 224} 225 226#undef __DEFAULT_FN_ATTRS 227 228#endif /* __FMAINTRIN_H */ 229