18d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------===
28d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt *
38d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * Permission is hereby granted, free of charge, to any person obtaining a copy
48d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * of this software and associated documentation files (the "Software"), to deal
5c5ec7f57ead87efa365800228aa0b09a12d9e6c4Dmitry Shmidt * in the Software without restriction, including without limitation the rights
6c5ec7f57ead87efa365800228aa0b09a12d9e6c4Dmitry Shmidt * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
78d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * copies of the Software, and to permit persons to whom the Software is
88d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * furnished to do so, subject to the following conditions:
98d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt *
108d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * The above copyright notice and this permission notice shall be included in
118d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * all copies or substantial portions of the Software.
128d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt *
138d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
148d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
158d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
168d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
177832adbbd72a1b784b7fb74a71a5d4085b0cb0d3Dmitry Shmidt * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
188d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19fb79edc9df1f20461e90e478363d207348213d35Dmitry Shmidt * THE SOFTWARE.
20fb79edc9df1f20461e90e478363d207348213d35Dmitry Shmidt *
218d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt *===-----------------------------------------------------------------------===
228d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt */
237832adbbd72a1b784b7fb74a71a5d4085b0cb0d3Dmitry Shmidt
247832adbbd72a1b784b7fb74a71a5d4085b0cb0d3Dmitry Shmidt#ifndef __IMMINTRIN_H
258d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
268d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#endif
278d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
288d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#ifndef __FMAINTRIN_H
298d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define __FMAINTRIN_H
308d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
318d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#ifndef __FMA__
328d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt# error "FMA instruction set is not enabled"
338d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#else
348d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
358d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
368d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
378d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
388d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C);
398d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
408d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
418d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
428d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
438d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
448d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C);
457832adbbd72a1b784b7fb74a71a5d4085b0cb0d3Dmitry Shmidt}
468d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
47051af73b8f8014eff33330aead0f36944b3403e6Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
488d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
498d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
508d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C);
518d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
5204949598a23f501be6eec21697465fd46a28840aDmitry Shmidt
5304949598a23f501be6eec21697465fd46a28840aDmitry Shmidtstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
548d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
558d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
568d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C);
578d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
588d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
598d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
60a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
61a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt{
62a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt  return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C);
638d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
648d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
658d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
667832adbbd72a1b784b7fb74a71a5d4085b0cb0d3Dmitry Shmidt_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
677832adbbd72a1b784b7fb74a71a5d4085b0cb0d3Dmitry Shmidt{
6861d9df3e62aaa0e87ad05452fcb95142159a17b6Dmitry Shmidt  return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C);
6961d9df3e62aaa0e87ad05452fcb95142159a17b6Dmitry Shmidt}
70bd14a57187b024f49f5b9ace55ef457d8d04650aDmitry Shmidt
71bd14a57187b024f49f5b9ace55ef457d8d04650aDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
728d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
738d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
741f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt  return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C);
751f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt}
768d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
778d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
781f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
791f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{
801f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt  return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C);
811f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt}
821f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
831f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
841f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
851f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{
861f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt  return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C);
871f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt}
881f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
891f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
9004949598a23f501be6eec21697465fd46a28840aDmitry Shmidt_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
918d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{
928d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C);
93}
94
95static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
96_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
97{
98  return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C);
99}
100
101static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
102_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
103{
104  return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C);
105}
106
107static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
108_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
109{
110  return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C);
111}
112
113static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
114_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
115{
116  return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C);
117}
118
119static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
120_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
121{
122  return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C);
123}
124
125static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
126_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
127{
128  return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C);
129}
130
131static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
132_mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)
133{
134  return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C);
135}
136
137static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
138_mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)
139{
140  return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C);
141}
142
143static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
144_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
145{
146  return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C);
147}
148
149static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
150_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
151{
152  return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C);
153}
154
155static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
156_mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
157{
158  return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C);
159}
160
161static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
162_mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
163{
164  return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C);
165}
166
167static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
168_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
169{
170  return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C);
171}
172
173static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
174_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
175{
176  return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C);
177}
178
179static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
180_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
181{
182  return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C);
183}
184
185static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
186_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
187{
188  return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C);
189}
190
191static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
192_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
193{
194  return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C);
195}
196
197static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
198_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
199{
200  return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C);
201}
202
203static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
204_mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)
205{
206  return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C);
207}
208
209static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
210_mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)
211{
212  return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C);
213}
214
215static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
216_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
217{
218  return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C);
219}
220
221static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
222_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
223{
224  return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C);
225}
226
227#endif /* __FMA__ */
228
229#endif /* __FMAINTRIN_H */
230