1/*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23#ifndef __CLANG_CUDA_CMATH_H__
24#define __CLANG_CUDA_CMATH_H__
25#ifndef __CUDA__
26#error "This file is for CUDA compilation only."
27#endif
28
29#include <limits>
30
31// CUDA lets us use various std math functions on the device side.  This file
32// works in concert with __clang_cuda_math_forward_declares.h to make this work.
33//
34// Specifically, the forward-declares header declares __device__ overloads for
35// these functions in the global namespace, then pulls them into namespace std
36// with 'using' statements.  Then this file implements those functions, after
37// their implementations have been pulled in.
38//
39// It's important that we declare the functions in the global namespace and pull
40// them into namespace std with using statements, as opposed to simply declaring
41// these functions in namespace std, because our device functions need to
42// overload the standard library functions, which may be declared in the global
43// namespace or in std, depending on the degree of conformance of the stdlib
44// implementation.  Declaring in the global namespace and pulling into namespace
45// std covers all of the known knowns.
46
47#define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
48
49__DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
50__DEVICE__ long abs(long __n) { return ::labs(__n); }
51__DEVICE__ float abs(float __x) { return ::fabsf(__x); }
52__DEVICE__ double abs(double __x) { return ::fabs(__x); }
53__DEVICE__ float acos(float __x) { return ::acosf(__x); }
54__DEVICE__ float asin(float __x) { return ::asinf(__x); }
55__DEVICE__ float atan(float __x) { return ::atanf(__x); }
56__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
57__DEVICE__ float ceil(float __x) { return ::ceilf(__x); }
58__DEVICE__ float cos(float __x) { return ::cosf(__x); }
59__DEVICE__ float cosh(float __x) { return ::coshf(__x); }
60__DEVICE__ float exp(float __x) { return ::expf(__x); }
61__DEVICE__ float fabs(float __x) { return ::fabsf(__x); }
62__DEVICE__ float floor(float __x) { return ::floorf(__x); }
63__DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
64__DEVICE__ int fpclassify(float __x) {
65  return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
66                              FP_ZERO, __x);
67}
68__DEVICE__ int fpclassify(double __x) {
69  return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
70                              FP_ZERO, __x);
71}
72__DEVICE__ float frexp(float __arg, int *__exp) {
73  return ::frexpf(__arg, __exp);
74}
75__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
76__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
77__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
78__DEVICE__ bool isfinite(double __x) { return ::__finite(__x); }
79__DEVICE__ bool isgreater(float __x, float __y) {
80  return __builtin_isgreater(__x, __y);
81}
82__DEVICE__ bool isgreater(double __x, double __y) {
83  return __builtin_isgreater(__x, __y);
84}
85__DEVICE__ bool isgreaterequal(float __x, float __y) {
86  return __builtin_isgreaterequal(__x, __y);
87}
88__DEVICE__ bool isgreaterequal(double __x, double __y) {
89  return __builtin_isgreaterequal(__x, __y);
90}
91__DEVICE__ bool isless(float __x, float __y) {
92  return __builtin_isless(__x, __y);
93}
94__DEVICE__ bool isless(double __x, double __y) {
95  return __builtin_isless(__x, __y);
96}
97__DEVICE__ bool islessequal(float __x, float __y) {
98  return __builtin_islessequal(__x, __y);
99}
100__DEVICE__ bool islessequal(double __x, double __y) {
101  return __builtin_islessequal(__x, __y);
102}
103__DEVICE__ bool islessgreater(float __x, float __y) {
104  return __builtin_islessgreater(__x, __y);
105}
106__DEVICE__ bool islessgreater(double __x, double __y) {
107  return __builtin_islessgreater(__x, __y);
108}
109__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
110__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
111__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
112__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
113__DEVICE__ bool isunordered(float __x, float __y) {
114  return __builtin_isunordered(__x, __y);
115}
116__DEVICE__ bool isunordered(double __x, double __y) {
117  return __builtin_isunordered(__x, __y);
118}
119__DEVICE__ float ldexp(float __arg, int __exp) {
120  return ::ldexpf(__arg, __exp);
121}
122__DEVICE__ float log(float __x) { return ::logf(__x); }
123__DEVICE__ float log10(float __x) { return ::log10f(__x); }
124__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
125__DEVICE__ float nexttoward(float __from, double __to) {
126  return __builtin_nexttowardf(__from, __to);
127}
128__DEVICE__ double nexttoward(double __from, double __to) {
129  return __builtin_nexttoward(__from, __to);
130}
131__DEVICE__ float nexttowardf(float __from, double __to) {
132  return __builtin_nexttowardf(__from, __to);
133}
134__DEVICE__ float pow(float __base, float __exp) {
135  return ::powf(__base, __exp);
136}
137__DEVICE__ float pow(float __base, int __iexp) {
138  return ::powif(__base, __iexp);
139}
140__DEVICE__ double pow(double __base, int __iexp) {
141  return ::powi(__base, __iexp);
142}
143__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
144__DEVICE__ bool signbit(double __x) { return ::__signbit(__x); }
145__DEVICE__ float sin(float __x) { return ::sinf(__x); }
146__DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
147__DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }
148__DEVICE__ float tan(float __x) { return ::tanf(__x); }
149__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
150
151// Now we've defined everything we promised we'd define in
152// __clang_cuda_math_forward_declares.h.  We need to do two additional things to
153// fix up our math functions.
154//
155// 1) Define __device__ overloads for e.g. sin(int).  The CUDA headers define
156//    only sin(float) and sin(double), which means that e.g. sin(0) is
157//    ambiguous.
158//
159// 2) Pull the __device__ overloads of "foobarf" math functions into namespace
160//    std.  These are defined in the CUDA headers in the global namespace,
161//    independent of everything else we've done here.
162
163// We can't use std::enable_if, because we want to be pre-C++11 compatible.  But
164// we go ahead and unconditionally define functions that are only available when
165// compiling for C++11 to match the behavior of the CUDA headers.
166template<bool __B, class __T = void>
167struct __clang_cuda_enable_if {};
168
169template <class __T> struct __clang_cuda_enable_if<true, __T> {
170  typedef __T type;
171};
172
173// Defines an overload of __fn that accepts one integral argument, calls
174// __fn((double)x), and returns __retty.
175#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn)                      \
176  template <typename __T>                                                      \
177  __DEVICE__                                                                   \
178      typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,    \
179                                      __retty>::type                           \
180      __fn(__T __x) {                                                          \
181    return ::__fn((double)__x);                                                \
182  }
183
184// Defines an overload of __fn that accepts one two arithmetic arguments, calls
185// __fn((double)x, (double)y), and returns a double.
186//
187// Note this is different from OVERLOAD_1, which generates an overload that
188// accepts only *integral* arguments.
189#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn)                      \
190  template <typename __T1, typename __T2>                                      \
191  __DEVICE__ typename __clang_cuda_enable_if<                                  \
192      std::numeric_limits<__T1>::is_specialized &&                             \
193          std::numeric_limits<__T2>::is_specialized,                           \
194      __retty>::type                                                           \
195  __fn(__T1 __x, __T2 __y) {                                                   \
196    return __fn((double)__x, (double)__y);                                     \
197  }
198
199__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos)
200__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh)
201__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin)
202__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh)
203__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan)
204__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2);
205__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh)
206__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt)
207__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil)
208__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign);
209__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos)
210__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh)
211__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf)
212__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc)
213__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp)
214__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2)
215__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1)
216__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs)
217__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim);
218__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor)
219__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax);
220__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin);
221__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod);
222__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify)
223__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot);
224__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb)
225__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite)
226__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater);
227__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal);
228__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf);
229__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless);
230__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal);
231__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater);
232__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan);
233__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal)
234__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered);
235__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma)
236__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log)
237__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10)
238__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p)
239__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2)
240__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb)
241__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint)
242__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround)
243__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint)
244__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround)
245__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint);
246__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter);
247__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow);
248__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder);
249__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint);
250__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round);
251__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit)
252__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin)
253__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh)
254__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt)
255__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan)
256__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh)
257__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma)
258__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc);
259
260#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1
261#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2
262
263// Overloads for functions that don't match the patterns expected by
264// __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.
265template <typename __T1, typename __T2, typename __T3>
266__DEVICE__ typename __clang_cuda_enable_if<
267    std::numeric_limits<__T1>::is_specialized &&
268        std::numeric_limits<__T2>::is_specialized &&
269        std::numeric_limits<__T3>::is_specialized,
270    double>::type
271fma(__T1 __x, __T2 __y, __T3 __z) {
272  return std::fma((double)__x, (double)__y, (double)__z);
273}
274
275template <typename __T>
276__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
277                                           double>::type
278frexp(__T __x, int *__exp) {
279  return std::frexp((double)__x, __exp);
280}
281
282template <typename __T>
283__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
284                                           double>::type
285ldexp(__T __x, int __exp) {
286  return std::ldexp((double)__x, __exp);
287}
288
289template <typename __T>
290__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
291                                           double>::type
292nexttoward(__T __from, double __to) {
293  return std::nexttoward((double)__from, __to);
294}
295
296template <typename __T1, typename __T2>
297__DEVICE__ typename __clang_cuda_enable_if<
298    std::numeric_limits<__T1>::is_specialized &&
299        std::numeric_limits<__T2>::is_specialized,
300    double>::type
301remquo(__T1 __x, __T2 __y, int *__quo) {
302  return std::remquo((double)__x, (double)__y, __quo);
303}
304
305template <typename __T>
306__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
307                                           double>::type
308scalbln(__T __x, long __exp) {
309  return std::scalbln((double)__x, __exp);
310}
311
312template <typename __T>
313__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
314                                           double>::type
315scalbn(__T __x, int __exp) {
316  return std::scalbn((double)__x, __exp);
317}
318
319// We need to define these overloads in exactly the namespace our standard
320// library uses (including the right inline namespace), otherwise they won't be
321// picked up by other functions in the standard library (e.g. functions in
322// <complex>).  Thus the ugliness below.
323#ifdef _LIBCPP_BEGIN_NAMESPACE_STD
324_LIBCPP_BEGIN_NAMESPACE_STD
325#else
326namespace std {
327#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
328_GLIBCXX_BEGIN_NAMESPACE_VERSION
329#endif
330#endif
331
332// Pull the new overloads we defined above into namespace std.
333using ::acos;
334using ::acosh;
335using ::asin;
336using ::asinh;
337using ::atan;
338using ::atan2;
339using ::atanh;
340using ::cbrt;
341using ::ceil;
342using ::copysign;
343using ::cos;
344using ::cosh;
345using ::erf;
346using ::erfc;
347using ::exp;
348using ::exp2;
349using ::expm1;
350using ::fabs;
351using ::fdim;
352using ::floor;
353using ::fma;
354using ::fmax;
355using ::fmin;
356using ::fmod;
357using ::fpclassify;
358using ::frexp;
359using ::hypot;
360using ::ilogb;
361using ::isfinite;
362using ::isgreater;
363using ::isgreaterequal;
364using ::isless;
365using ::islessequal;
366using ::islessgreater;
367using ::isnormal;
368using ::isunordered;
369using ::ldexp;
370using ::lgamma;
371using ::llrint;
372using ::llround;
373using ::log;
374using ::log10;
375using ::log1p;
376using ::log2;
377using ::logb;
378using ::lrint;
379using ::lround;
380using ::nearbyint;
381using ::nextafter;
382using ::nexttoward;
383using ::pow;
384using ::remainder;
385using ::remquo;
386using ::rint;
387using ::round;
388using ::scalbln;
389using ::scalbn;
390using ::signbit;
391using ::sin;
392using ::sinh;
393using ::sqrt;
394using ::tan;
395using ::tanh;
396using ::tgamma;
397using ::trunc;
398
399// Well this is fun: We need to pull these symbols in for libc++, but we can't
400// pull them in with libstdc++, because its ::isinf and ::isnan are different
401// than its std::isinf and std::isnan.
402#ifndef __GLIBCXX__
403using ::isinf;
404using ::isnan;
405#endif
406
407// Finally, pull the "foobarf" functions that CUDA defines in its headers into
408// namespace std.
409using ::acosf;
410using ::acoshf;
411using ::asinf;
412using ::asinhf;
413using ::atan2f;
414using ::atanf;
415using ::atanhf;
416using ::cbrtf;
417using ::ceilf;
418using ::copysignf;
419using ::cosf;
420using ::coshf;
421using ::erfcf;
422using ::erff;
423using ::exp2f;
424using ::expf;
425using ::expm1f;
426using ::fabsf;
427using ::fdimf;
428using ::floorf;
429using ::fmaf;
430using ::fmaxf;
431using ::fminf;
432using ::fmodf;
433using ::frexpf;
434using ::hypotf;
435using ::ilogbf;
436using ::ldexpf;
437using ::lgammaf;
438using ::llrintf;
439using ::llroundf;
440using ::log10f;
441using ::log1pf;
442using ::log2f;
443using ::logbf;
444using ::logf;
445using ::lrintf;
446using ::lroundf;
447using ::modff;
448using ::nearbyintf;
449using ::nextafterf;
450using ::nexttowardf;
451using ::nexttowardf;
452using ::powf;
453using ::remainderf;
454using ::remquof;
455using ::rintf;
456using ::roundf;
457using ::scalblnf;
458using ::scalbnf;
459using ::sinf;
460using ::sinhf;
461using ::sqrtf;
462using ::tanf;
463using ::tanhf;
464using ::tgammaf;
465using ::truncf;
466
467#ifdef _LIBCPP_END_NAMESPACE_STD
468_LIBCPP_END_NAMESPACE_STD
469#else
470#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
471_GLIBCXX_END_NAMESPACE_VERSION
472#endif
473} // namespace std
474#endif
475
476#undef __DEVICE__
477
478#endif
479