1/*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23#ifndef __CLANG_CUDA_CMATH_H__
24#define __CLANG_CUDA_CMATH_H__
25#ifndef __CUDA__
26#error "This file is for CUDA compilation only."
27#endif
28
29#include <limits>
30
31// CUDA lets us use various std math functions on the device side.  This file
32// works in concert with __clang_cuda_math_forward_declares.h to make this work.
33//
34// Specifically, the forward-declares header declares __device__ overloads for
35// these functions in the global namespace, then pulls them into namespace std
36// with 'using' statements.  Then this file implements those functions, after
37// their implementations have been pulled in.
38//
39// It's important that we declare the functions in the global namespace and pull
40// them into namespace std with using statements, as opposed to simply declaring
41// these functions in namespace std, because our device functions need to
42// overload the standard library functions, which may be declared in the global
43// namespace or in std, depending on the degree of conformance of the stdlib
44// implementation.  Declaring in the global namespace and pulling into namespace
45// std covers all of the known knowns.
46
47#define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
48
49__DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
50__DEVICE__ long abs(long __n) { return ::labs(__n); }
51__DEVICE__ float abs(float __x) { return ::fabsf(__x); }
52__DEVICE__ double abs(double __x) { return ::fabs(__x); }
53__DEVICE__ float acos(float __x) { return ::acosf(__x); }
54__DEVICE__ float asin(float __x) { return ::asinf(__x); }
55__DEVICE__ float atan(float __x) { return ::atanf(__x); }
56__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
57__DEVICE__ float ceil(float __x) { return ::ceilf(__x); }
58__DEVICE__ float cos(float __x) { return ::cosf(__x); }
59__DEVICE__ float cosh(float __x) { return ::coshf(__x); }
60__DEVICE__ float exp(float __x) { return ::expf(__x); }
61__DEVICE__ float fabs(float __x) { return ::fabsf(__x); }
62__DEVICE__ float floor(float __x) { return ::floorf(__x); }
63__DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
64__DEVICE__ int fpclassify(float __x) {
65  return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
66                              FP_ZERO, __x);
67}
68__DEVICE__ int fpclassify(double __x) {
69  return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
70                              FP_ZERO, __x);
71}
72__DEVICE__ float frexp(float __arg, int *__exp) {
73  return ::frexpf(__arg, __exp);
74}
75
76// For inscrutable reasons, the CUDA headers define these functions for us on
77// Windows.
78#ifndef _MSC_VER
79__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
80__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
81__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
82// For inscrutable reasons, __finite(), the double-precision version of
83// __finitef, does not exist when compiling for MacOS.  __isfinited is available
84// everywhere and is just as good.
85__DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }
86__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
87__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
88#endif
89
90__DEVICE__ bool isgreater(float __x, float __y) {
91  return __builtin_isgreater(__x, __y);
92}
93__DEVICE__ bool isgreater(double __x, double __y) {
94  return __builtin_isgreater(__x, __y);
95}
96__DEVICE__ bool isgreaterequal(float __x, float __y) {
97  return __builtin_isgreaterequal(__x, __y);
98}
99__DEVICE__ bool isgreaterequal(double __x, double __y) {
100  return __builtin_isgreaterequal(__x, __y);
101}
102__DEVICE__ bool isless(float __x, float __y) {
103  return __builtin_isless(__x, __y);
104}
105__DEVICE__ bool isless(double __x, double __y) {
106  return __builtin_isless(__x, __y);
107}
108__DEVICE__ bool islessequal(float __x, float __y) {
109  return __builtin_islessequal(__x, __y);
110}
111__DEVICE__ bool islessequal(double __x, double __y) {
112  return __builtin_islessequal(__x, __y);
113}
114__DEVICE__ bool islessgreater(float __x, float __y) {
115  return __builtin_islessgreater(__x, __y);
116}
117__DEVICE__ bool islessgreater(double __x, double __y) {
118  return __builtin_islessgreater(__x, __y);
119}
120__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
121__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
122__DEVICE__ bool isunordered(float __x, float __y) {
123  return __builtin_isunordered(__x, __y);
124}
125__DEVICE__ bool isunordered(double __x, double __y) {
126  return __builtin_isunordered(__x, __y);
127}
128__DEVICE__ float ldexp(float __arg, int __exp) {
129  return ::ldexpf(__arg, __exp);
130}
131__DEVICE__ float log(float __x) { return ::logf(__x); }
132__DEVICE__ float log10(float __x) { return ::log10f(__x); }
133__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
134__DEVICE__ float nexttoward(float __from, double __to) {
135  return __builtin_nexttowardf(__from, __to);
136}
137__DEVICE__ double nexttoward(double __from, double __to) {
138  return __builtin_nexttoward(__from, __to);
139}
140__DEVICE__ float nexttowardf(float __from, double __to) {
141  return __builtin_nexttowardf(__from, __to);
142}
143__DEVICE__ float pow(float __base, float __exp) {
144  return ::powf(__base, __exp);
145}
146__DEVICE__ float pow(float __base, int __iexp) {
147  return ::powif(__base, __iexp);
148}
149__DEVICE__ double pow(double __base, int __iexp) {
150  return ::powi(__base, __iexp);
151}
152__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
153__DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); }
154__DEVICE__ float sin(float __x) { return ::sinf(__x); }
155__DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
156__DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }
157__DEVICE__ float tan(float __x) { return ::tanf(__x); }
158__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
159
160// Now we've defined everything we promised we'd define in
161// __clang_cuda_math_forward_declares.h.  We need to do two additional things to
162// fix up our math functions.
163//
164// 1) Define __device__ overloads for e.g. sin(int).  The CUDA headers define
165//    only sin(float) and sin(double), which means that e.g. sin(0) is
166//    ambiguous.
167//
168// 2) Pull the __device__ overloads of "foobarf" math functions into namespace
169//    std.  These are defined in the CUDA headers in the global namespace,
170//    independent of everything else we've done here.
171
172// We can't use std::enable_if, because we want to be pre-C++11 compatible.  But
173// we go ahead and unconditionally define functions that are only available when
174// compiling for C++11 to match the behavior of the CUDA headers.
175template<bool __B, class __T = void>
176struct __clang_cuda_enable_if {};
177
178template <class __T> struct __clang_cuda_enable_if<true, __T> {
179  typedef __T type;
180};
181
182// Defines an overload of __fn that accepts one integral argument, calls
183// __fn((double)x), and returns __retty.
184#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn)                      \
185  template <typename __T>                                                      \
186  __DEVICE__                                                                   \
187      typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,    \
188                                      __retty>::type                           \
189      __fn(__T __x) {                                                          \
190    return ::__fn((double)__x);                                                \
191  }
192
193// Defines an overload of __fn that accepts one two arithmetic arguments, calls
194// __fn((double)x, (double)y), and returns a double.
195//
196// Note this is different from OVERLOAD_1, which generates an overload that
197// accepts only *integral* arguments.
198#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn)                      \
199  template <typename __T1, typename __T2>                                      \
200  __DEVICE__ typename __clang_cuda_enable_if<                                  \
201      std::numeric_limits<__T1>::is_specialized &&                             \
202          std::numeric_limits<__T2>::is_specialized,                           \
203      __retty>::type                                                           \
204  __fn(__T1 __x, __T2 __y) {                                                   \
205    return __fn((double)__x, (double)__y);                                     \
206  }
207
208__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos)
209__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh)
210__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin)
211__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh)
212__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan)
213__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2);
214__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh)
215__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt)
216__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil)
217__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign);
218__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos)
219__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh)
220__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf)
221__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc)
222__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp)
223__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2)
224__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1)
225__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs)
226__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim);
227__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor)
228__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax);
229__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin);
230__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod);
231__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify)
232__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot);
233__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb)
234__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite)
235__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater);
236__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal);
237__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf);
238__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless);
239__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal);
240__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater);
241__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan);
242__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal)
243__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered);
244__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma)
245__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log)
246__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10)
247__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p)
248__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2)
249__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb)
250__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint)
251__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround)
252__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint)
253__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround)
254__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint);
255__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter);
256__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow);
257__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder);
258__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint);
259__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round);
260__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit)
261__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin)
262__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh)
263__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt)
264__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan)
265__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh)
266__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma)
267__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc);
268
269#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1
270#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2
271
272// Overloads for functions that don't match the patterns expected by
273// __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.
274template <typename __T1, typename __T2, typename __T3>
275__DEVICE__ typename __clang_cuda_enable_if<
276    std::numeric_limits<__T1>::is_specialized &&
277        std::numeric_limits<__T2>::is_specialized &&
278        std::numeric_limits<__T3>::is_specialized,
279    double>::type
280fma(__T1 __x, __T2 __y, __T3 __z) {
281  return std::fma((double)__x, (double)__y, (double)__z);
282}
283
284template <typename __T>
285__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
286                                           double>::type
287frexp(__T __x, int *__exp) {
288  return std::frexp((double)__x, __exp);
289}
290
291template <typename __T>
292__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
293                                           double>::type
294ldexp(__T __x, int __exp) {
295  return std::ldexp((double)__x, __exp);
296}
297
298template <typename __T>
299__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
300                                           double>::type
301nexttoward(__T __from, double __to) {
302  return std::nexttoward((double)__from, __to);
303}
304
305template <typename __T1, typename __T2>
306__DEVICE__ typename __clang_cuda_enable_if<
307    std::numeric_limits<__T1>::is_specialized &&
308        std::numeric_limits<__T2>::is_specialized,
309    double>::type
310remquo(__T1 __x, __T2 __y, int *__quo) {
311  return std::remquo((double)__x, (double)__y, __quo);
312}
313
314template <typename __T>
315__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
316                                           double>::type
317scalbln(__T __x, long __exp) {
318  return std::scalbln((double)__x, __exp);
319}
320
321template <typename __T>
322__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
323                                           double>::type
324scalbn(__T __x, int __exp) {
325  return std::scalbn((double)__x, __exp);
326}
327
328// We need to define these overloads in exactly the namespace our standard
329// library uses (including the right inline namespace), otherwise they won't be
330// picked up by other functions in the standard library (e.g. functions in
331// <complex>).  Thus the ugliness below.
332#ifdef _LIBCPP_BEGIN_NAMESPACE_STD
333_LIBCPP_BEGIN_NAMESPACE_STD
334#else
335namespace std {
336#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
337_GLIBCXX_BEGIN_NAMESPACE_VERSION
338#endif
339#endif
340
341// Pull the new overloads we defined above into namespace std.
342using ::acos;
343using ::acosh;
344using ::asin;
345using ::asinh;
346using ::atan;
347using ::atan2;
348using ::atanh;
349using ::cbrt;
350using ::ceil;
351using ::copysign;
352using ::cos;
353using ::cosh;
354using ::erf;
355using ::erfc;
356using ::exp;
357using ::exp2;
358using ::expm1;
359using ::fabs;
360using ::fdim;
361using ::floor;
362using ::fma;
363using ::fmax;
364using ::fmin;
365using ::fmod;
366using ::fpclassify;
367using ::frexp;
368using ::hypot;
369using ::ilogb;
370using ::isfinite;
371using ::isgreater;
372using ::isgreaterequal;
373using ::isless;
374using ::islessequal;
375using ::islessgreater;
376using ::isnormal;
377using ::isunordered;
378using ::ldexp;
379using ::lgamma;
380using ::llrint;
381using ::llround;
382using ::log;
383using ::log10;
384using ::log1p;
385using ::log2;
386using ::logb;
387using ::lrint;
388using ::lround;
389using ::nearbyint;
390using ::nextafter;
391using ::nexttoward;
392using ::pow;
393using ::remainder;
394using ::remquo;
395using ::rint;
396using ::round;
397using ::scalbln;
398using ::scalbn;
399using ::signbit;
400using ::sin;
401using ::sinh;
402using ::sqrt;
403using ::tan;
404using ::tanh;
405using ::tgamma;
406using ::trunc;
407
408// Well this is fun: We need to pull these symbols in for libc++, but we can't
409// pull them in with libstdc++, because its ::isinf and ::isnan are different
410// than its std::isinf and std::isnan.
411#ifndef __GLIBCXX__
412using ::isinf;
413using ::isnan;
414#endif
415
416// Finally, pull the "foobarf" functions that CUDA defines in its headers into
417// namespace std.
418using ::acosf;
419using ::acoshf;
420using ::asinf;
421using ::asinhf;
422using ::atan2f;
423using ::atanf;
424using ::atanhf;
425using ::cbrtf;
426using ::ceilf;
427using ::copysignf;
428using ::cosf;
429using ::coshf;
430using ::erfcf;
431using ::erff;
432using ::exp2f;
433using ::expf;
434using ::expm1f;
435using ::fabsf;
436using ::fdimf;
437using ::floorf;
438using ::fmaf;
439using ::fmaxf;
440using ::fminf;
441using ::fmodf;
442using ::frexpf;
443using ::hypotf;
444using ::ilogbf;
445using ::ldexpf;
446using ::lgammaf;
447using ::llrintf;
448using ::llroundf;
449using ::log10f;
450using ::log1pf;
451using ::log2f;
452using ::logbf;
453using ::logf;
454using ::lrintf;
455using ::lroundf;
456using ::modff;
457using ::nearbyintf;
458using ::nextafterf;
459using ::nexttowardf;
460using ::nexttowardf;
461using ::powf;
462using ::remainderf;
463using ::remquof;
464using ::rintf;
465using ::roundf;
466using ::scalblnf;
467using ::scalbnf;
468using ::sinf;
469using ::sinhf;
470using ::sqrtf;
471using ::tanf;
472using ::tanhf;
473using ::tgammaf;
474using ::truncf;
475
476#ifdef _LIBCPP_END_NAMESPACE_STD
477_LIBCPP_END_NAMESPACE_STD
478#else
479#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
480_GLIBCXX_END_NAMESPACE_VERSION
481#endif
482} // namespace std
483#endif
484
485#undef __DEVICE__
486
487#endif
488