18d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------=== 28d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * 38d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * Permission is hereby granted, free of charge, to any person obtaining a copy 48d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * of this software and associated documentation files (the "Software"), to deal 5c5ec7f57ead87efa365800228aa0b09a12d9e6c4Dmitry Shmidt * in the Software without restriction, including without limitation the rights 6c5ec7f57ead87efa365800228aa0b09a12d9e6c4Dmitry Shmidt * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 78d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * copies of the Software, and to permit persons to whom the Software is 88d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * furnished to do so, subject to the following conditions: 98d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * 108d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * The above copyright notice and this permission notice shall be included in 118d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * all copies or substantial portions of the Software. 128d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * 138d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 148d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 158d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 168d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 177832adbbd72a1b784b7fb74a71a5d4085b0cb0d3Dmitry Shmidt * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 188d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19fb79edc9df1f20461e90e478363d207348213d35Dmitry Shmidt * THE SOFTWARE. 20fb79edc9df1f20461e90e478363d207348213d35Dmitry Shmidt * 218d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt *===-----------------------------------------------------------------------=== 228d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt */ 237832adbbd72a1b784b7fb74a71a5d4085b0cb0d3Dmitry Shmidt 247832adbbd72a1b784b7fb74a71a5d4085b0cb0d3Dmitry Shmidt#ifndef __IMMINTRIN_H 258d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#error "Never use <fmaintrin.h> directly; include <immintrin.h> instead." 268d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#endif 278d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 288d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#ifndef __FMAINTRIN_H 298d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define __FMAINTRIN_H 308d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 318d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#ifndef __FMA__ 328d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt# error "FMA instruction set is not enabled" 338d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#else 348d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 358d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 368d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) 378d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 388d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C); 398d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 408d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 418d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 428d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) 438d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 448d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C); 457832adbbd72a1b784b7fb74a71a5d4085b0cb0d3Dmitry Shmidt} 468d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 47051af73b8f8014eff33330aead0f36944b3403e6Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 488d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) 498d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 508d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C); 518d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 5204949598a23f501be6eec21697465fd46a28840aDmitry Shmidt 5304949598a23f501be6eec21697465fd46a28840aDmitry Shmidtstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 548d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) 558d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 568d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C); 578d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 588d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 598d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 60a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) 61a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt{ 62a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C); 638d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 648d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 658d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 667832adbbd72a1b784b7fb74a71a5d4085b0cb0d3Dmitry Shmidt_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) 677832adbbd72a1b784b7fb74a71a5d4085b0cb0d3Dmitry Shmidt{ 6861d9df3e62aaa0e87ad05452fcb95142159a17b6Dmitry Shmidt return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C); 6961d9df3e62aaa0e87ad05452fcb95142159a17b6Dmitry Shmidt} 70bd14a57187b024f49f5b9ace55ef457d8d04650aDmitry Shmidt 71bd14a57187b024f49f5b9ace55ef457d8d04650aDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 728d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) 738d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 741f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C); 751f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt} 768d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 778d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 781f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) 791f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{ 801f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C); 811f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt} 821f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt 831f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 841f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) 851f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{ 861f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C); 871f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt} 881f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt 891f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 9004949598a23f501be6eec21697465fd46a28840aDmitry Shmidt_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) 918d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 928d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C); 93} 94 95static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 96_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) 97{ 98 return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C); 99} 100 101static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 102_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) 103{ 104 return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C); 105} 106 107static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 108_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) 109{ 110 return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C); 111} 112 113static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 114_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) 115{ 116 return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C); 117} 118 119static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 120_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) 121{ 122 return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C); 123} 124 125static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 126_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) 127{ 128 return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C); 129} 130 131static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 132_mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) 133{ 134 return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C); 135} 136 137static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 138_mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) 139{ 140 return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C); 141} 142 143static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 144_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) 145{ 146 return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C); 147} 148 149static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 150_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) 151{ 152 return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C); 153} 154 155static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 156_mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) 157{ 158 return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C); 159} 160 161static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) 162_mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) 163{ 164 return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C); 165} 166 167static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 168_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) 169{ 170 return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C); 171} 172 173static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) 174_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) 175{ 176 return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C); 177} 178 179static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 180_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) 181{ 182 return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C); 183} 184 185static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) 186_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) 187{ 188 return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C); 189} 190 191static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 192_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) 193{ 194 return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C); 195} 196 197static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) 198_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) 199{ 200 return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C); 201} 202 203static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 204_mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) 205{ 206 return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C); 207} 208 209static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) 210_mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) 211{ 212 return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C); 213} 214 215static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 216_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) 217{ 218 return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C); 219} 220 221static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) 222_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) 223{ 224 return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C); 225} 226 227#endif /* __FMA__ */ 228 229#endif /* __FMAINTRIN_H */ 230