1/*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23#ifndef __CLANG_CUDA_CMATH_H__ 24#define __CLANG_CUDA_CMATH_H__ 25#ifndef __CUDA__ 26#error "This file is for CUDA compilation only." 27#endif 28 29#include <limits> 30 31// CUDA lets us use various std math functions on the device side. This file 32// works in concert with __clang_cuda_math_forward_declares.h to make this work. 33// 34// Specifically, the forward-declares header declares __device__ overloads for 35// these functions in the global namespace, then pulls them into namespace std 36// with 'using' statements. Then this file implements those functions, after 37// their implementations have been pulled in. 38// 39// It's important that we declare the functions in the global namespace and pull 40// them into namespace std with using statements, as opposed to simply declaring 41// these functions in namespace std, because our device functions need to 42// overload the standard library functions, which may be declared in the global 43// namespace or in std, depending on the degree of conformance of the stdlib 44// implementation. Declaring in the global namespace and pulling into namespace 45// std covers all of the known knowns. 46 47#define __DEVICE__ static __device__ __inline__ __attribute__((always_inline)) 48 49__DEVICE__ long long abs(long long __n) { return ::llabs(__n); } 50__DEVICE__ long abs(long __n) { return ::labs(__n); } 51__DEVICE__ float abs(float __x) { return ::fabsf(__x); } 52__DEVICE__ double abs(double __x) { return ::fabs(__x); } 53__DEVICE__ float acos(float __x) { return ::acosf(__x); } 54__DEVICE__ float asin(float __x) { return ::asinf(__x); } 55__DEVICE__ float atan(float __x) { return ::atanf(__x); } 56__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); } 57__DEVICE__ float ceil(float __x) { return ::ceilf(__x); } 58__DEVICE__ float cos(float __x) { return ::cosf(__x); } 59__DEVICE__ float cosh(float __x) { return ::coshf(__x); } 60__DEVICE__ float exp(float __x) { return ::expf(__x); } 61__DEVICE__ float fabs(float __x) { return ::fabsf(__x); } 62__DEVICE__ float floor(float __x) { return ::floorf(__x); } 63__DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); } 64__DEVICE__ int fpclassify(float __x) { 65 return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, 66 FP_ZERO, __x); 67} 68__DEVICE__ int fpclassify(double __x) { 69 return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, 70 FP_ZERO, __x); 71} 72__DEVICE__ float frexp(float __arg, int *__exp) { 73 return ::frexpf(__arg, __exp); 74} 75__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); } 76__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); } 77__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); } 78__DEVICE__ bool isfinite(double __x) { return ::__finite(__x); } 79__DEVICE__ bool isgreater(float __x, float __y) { 80 return __builtin_isgreater(__x, __y); 81} 82__DEVICE__ bool isgreater(double __x, double __y) { 83 return __builtin_isgreater(__x, __y); 84} 85__DEVICE__ bool isgreaterequal(float __x, float __y) { 86 return __builtin_isgreaterequal(__x, __y); 87} 88__DEVICE__ bool isgreaterequal(double __x, double __y) { 89 return __builtin_isgreaterequal(__x, __y); 90} 91__DEVICE__ bool isless(float __x, float __y) { 92 return __builtin_isless(__x, __y); 93} 94__DEVICE__ bool isless(double __x, double __y) { 95 return __builtin_isless(__x, __y); 96} 97__DEVICE__ bool islessequal(float __x, float __y) { 98 return __builtin_islessequal(__x, __y); 99} 100__DEVICE__ bool islessequal(double __x, double __y) { 101 return __builtin_islessequal(__x, __y); 102} 103__DEVICE__ bool islessgreater(float __x, float __y) { 104 return __builtin_islessgreater(__x, __y); 105} 106__DEVICE__ bool islessgreater(double __x, double __y) { 107 return __builtin_islessgreater(__x, __y); 108} 109__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); } 110__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); } 111__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); } 112__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); } 113__DEVICE__ bool isunordered(float __x, float __y) { 114 return __builtin_isunordered(__x, __y); 115} 116__DEVICE__ bool isunordered(double __x, double __y) { 117 return __builtin_isunordered(__x, __y); 118} 119__DEVICE__ float ldexp(float __arg, int __exp) { 120 return ::ldexpf(__arg, __exp); 121} 122__DEVICE__ float log(float __x) { return ::logf(__x); } 123__DEVICE__ float log10(float __x) { return ::log10f(__x); } 124__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); } 125__DEVICE__ float nexttoward(float __from, double __to) { 126 return __builtin_nexttowardf(__from, __to); 127} 128__DEVICE__ double nexttoward(double __from, double __to) { 129 return __builtin_nexttoward(__from, __to); 130} 131__DEVICE__ float nexttowardf(float __from, double __to) { 132 return __builtin_nexttowardf(__from, __to); 133} 134__DEVICE__ float pow(float __base, float __exp) { 135 return ::powf(__base, __exp); 136} 137__DEVICE__ float pow(float __base, int __iexp) { 138 return ::powif(__base, __iexp); 139} 140__DEVICE__ double pow(double __base, int __iexp) { 141 return ::powi(__base, __iexp); 142} 143__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); } 144__DEVICE__ bool signbit(double __x) { return ::__signbit(__x); } 145__DEVICE__ float sin(float __x) { return ::sinf(__x); } 146__DEVICE__ float sinh(float __x) { return ::sinhf(__x); } 147__DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); } 148__DEVICE__ float tan(float __x) { return ::tanf(__x); } 149__DEVICE__ float tanh(float __x) { return ::tanhf(__x); } 150 151// Now we've defined everything we promised we'd define in 152// __clang_cuda_math_forward_declares.h. We need to do two additional things to 153// fix up our math functions. 154// 155// 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define 156// only sin(float) and sin(double), which means that e.g. sin(0) is 157// ambiguous. 158// 159// 2) Pull the __device__ overloads of "foobarf" math functions into namespace 160// std. These are defined in the CUDA headers in the global namespace, 161// independent of everything else we've done here. 162 163// We can't use std::enable_if, because we want to be pre-C++11 compatible. But 164// we go ahead and unconditionally define functions that are only available when 165// compiling for C++11 to match the behavior of the CUDA headers. 166template<bool __B, class __T = void> 167struct __clang_cuda_enable_if {}; 168 169template <class __T> struct __clang_cuda_enable_if<true, __T> { 170 typedef __T type; 171}; 172 173// Defines an overload of __fn that accepts one integral argument, calls 174// __fn((double)x), and returns __retty. 175#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \ 176 template <typename __T> \ 177 __DEVICE__ \ 178 typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, \ 179 __retty>::type \ 180 __fn(__T __x) { \ 181 return ::__fn((double)__x); \ 182 } 183 184// Defines an overload of __fn that accepts one two arithmetic arguments, calls 185// __fn((double)x, (double)y), and returns a double. 186// 187// Note this is different from OVERLOAD_1, which generates an overload that 188// accepts only *integral* arguments. 189#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \ 190 template <typename __T1, typename __T2> \ 191 __DEVICE__ typename __clang_cuda_enable_if< \ 192 std::numeric_limits<__T1>::is_specialized && \ 193 std::numeric_limits<__T2>::is_specialized, \ 194 __retty>::type \ 195 __fn(__T1 __x, __T2 __y) { \ 196 return __fn((double)__x, (double)__y); \ 197 } 198 199__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos) 200__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh) 201__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin) 202__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh) 203__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan) 204__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2); 205__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh) 206__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt) 207__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil) 208__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign); 209__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos) 210__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh) 211__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf) 212__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc) 213__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp) 214__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2) 215__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1) 216__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs) 217__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim); 218__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor) 219__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax); 220__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin); 221__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod); 222__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify) 223__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot); 224__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb) 225__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite) 226__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater); 227__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal); 228__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf); 229__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless); 230__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal); 231__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater); 232__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan); 233__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal) 234__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered); 235__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma) 236__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log) 237__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10) 238__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p) 239__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2) 240__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb) 241__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint) 242__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround) 243__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint) 244__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround) 245__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint); 246__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter); 247__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow); 248__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder); 249__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint); 250__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round); 251__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit) 252__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin) 253__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh) 254__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt) 255__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan) 256__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh) 257__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma) 258__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc); 259 260#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1 261#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2 262 263// Overloads for functions that don't match the patterns expected by 264// __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}. 265template <typename __T1, typename __T2, typename __T3> 266__DEVICE__ typename __clang_cuda_enable_if< 267 std::numeric_limits<__T1>::is_specialized && 268 std::numeric_limits<__T2>::is_specialized && 269 std::numeric_limits<__T3>::is_specialized, 270 double>::type 271fma(__T1 __x, __T2 __y, __T3 __z) { 272 return std::fma((double)__x, (double)__y, (double)__z); 273} 274 275template <typename __T> 276__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 277 double>::type 278frexp(__T __x, int *__exp) { 279 return std::frexp((double)__x, __exp); 280} 281 282template <typename __T> 283__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 284 double>::type 285ldexp(__T __x, int __exp) { 286 return std::ldexp((double)__x, __exp); 287} 288 289template <typename __T> 290__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 291 double>::type 292nexttoward(__T __from, double __to) { 293 return std::nexttoward((double)__from, __to); 294} 295 296template <typename __T1, typename __T2> 297__DEVICE__ typename __clang_cuda_enable_if< 298 std::numeric_limits<__T1>::is_specialized && 299 std::numeric_limits<__T2>::is_specialized, 300 double>::type 301remquo(__T1 __x, __T2 __y, int *__quo) { 302 return std::remquo((double)__x, (double)__y, __quo); 303} 304 305template <typename __T> 306__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 307 double>::type 308scalbln(__T __x, long __exp) { 309 return std::scalbln((double)__x, __exp); 310} 311 312template <typename __T> 313__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 314 double>::type 315scalbn(__T __x, int __exp) { 316 return std::scalbn((double)__x, __exp); 317} 318 319// We need to define these overloads in exactly the namespace our standard 320// library uses (including the right inline namespace), otherwise they won't be 321// picked up by other functions in the standard library (e.g. functions in 322// <complex>). Thus the ugliness below. 323#ifdef _LIBCPP_BEGIN_NAMESPACE_STD 324_LIBCPP_BEGIN_NAMESPACE_STD 325#else 326namespace std { 327#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION 328_GLIBCXX_BEGIN_NAMESPACE_VERSION 329#endif 330#endif 331 332// Pull the new overloads we defined above into namespace std. 333using ::acos; 334using ::acosh; 335using ::asin; 336using ::asinh; 337using ::atan; 338using ::atan2; 339using ::atanh; 340using ::cbrt; 341using ::ceil; 342using ::copysign; 343using ::cos; 344using ::cosh; 345using ::erf; 346using ::erfc; 347using ::exp; 348using ::exp2; 349using ::expm1; 350using ::fabs; 351using ::fdim; 352using ::floor; 353using ::fma; 354using ::fmax; 355using ::fmin; 356using ::fmod; 357using ::fpclassify; 358using ::frexp; 359using ::hypot; 360using ::ilogb; 361using ::isfinite; 362using ::isgreater; 363using ::isgreaterequal; 364using ::isless; 365using ::islessequal; 366using ::islessgreater; 367using ::isnormal; 368using ::isunordered; 369using ::ldexp; 370using ::lgamma; 371using ::llrint; 372using ::llround; 373using ::log; 374using ::log10; 375using ::log1p; 376using ::log2; 377using ::logb; 378using ::lrint; 379using ::lround; 380using ::nearbyint; 381using ::nextafter; 382using ::nexttoward; 383using ::pow; 384using ::remainder; 385using ::remquo; 386using ::rint; 387using ::round; 388using ::scalbln; 389using ::scalbn; 390using ::signbit; 391using ::sin; 392using ::sinh; 393using ::sqrt; 394using ::tan; 395using ::tanh; 396using ::tgamma; 397using ::trunc; 398 399// Well this is fun: We need to pull these symbols in for libc++, but we can't 400// pull them in with libstdc++, because its ::isinf and ::isnan are different 401// than its std::isinf and std::isnan. 402#ifndef __GLIBCXX__ 403using ::isinf; 404using ::isnan; 405#endif 406 407// Finally, pull the "foobarf" functions that CUDA defines in its headers into 408// namespace std. 409using ::acosf; 410using ::acoshf; 411using ::asinf; 412using ::asinhf; 413using ::atan2f; 414using ::atanf; 415using ::atanhf; 416using ::cbrtf; 417using ::ceilf; 418using ::copysignf; 419using ::cosf; 420using ::coshf; 421using ::erfcf; 422using ::erff; 423using ::exp2f; 424using ::expf; 425using ::expm1f; 426using ::fabsf; 427using ::fdimf; 428using ::floorf; 429using ::fmaf; 430using ::fmaxf; 431using ::fminf; 432using ::fmodf; 433using ::frexpf; 434using ::hypotf; 435using ::ilogbf; 436using ::ldexpf; 437using ::lgammaf; 438using ::llrintf; 439using ::llroundf; 440using ::log10f; 441using ::log1pf; 442using ::log2f; 443using ::logbf; 444using ::logf; 445using ::lrintf; 446using ::lroundf; 447using ::modff; 448using ::nearbyintf; 449using ::nextafterf; 450using ::nexttowardf; 451using ::nexttowardf; 452using ::powf; 453using ::remainderf; 454using ::remquof; 455using ::rintf; 456using ::roundf; 457using ::scalblnf; 458using ::scalbnf; 459using ::sinf; 460using ::sinhf; 461using ::sqrtf; 462using ::tanf; 463using ::tanhf; 464using ::tgammaf; 465using ::truncf; 466 467#ifdef _LIBCPP_END_NAMESPACE_STD 468_LIBCPP_END_NAMESPACE_STD 469#else 470#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION 471_GLIBCXX_END_NAMESPACE_VERSION 472#endif 473} // namespace std 474#endif 475 476#undef __DEVICE__ 477 478#endif 479