1/*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23#ifndef __CLANG_CUDA_CMATH_H__ 24#define __CLANG_CUDA_CMATH_H__ 25#ifndef __CUDA__ 26#error "This file is for CUDA compilation only." 27#endif 28 29#include <limits> 30 31// CUDA lets us use various std math functions on the device side. This file 32// works in concert with __clang_cuda_math_forward_declares.h to make this work. 33// 34// Specifically, the forward-declares header declares __device__ overloads for 35// these functions in the global namespace, then pulls them into namespace std 36// with 'using' statements. Then this file implements those functions, after 37// their implementations have been pulled in. 38// 39// It's important that we declare the functions in the global namespace and pull 40// them into namespace std with using statements, as opposed to simply declaring 41// these functions in namespace std, because our device functions need to 42// overload the standard library functions, which may be declared in the global 43// namespace or in std, depending on the degree of conformance of the stdlib 44// implementation. Declaring in the global namespace and pulling into namespace 45// std covers all of the known knowns. 46 47#define __DEVICE__ static __device__ __inline__ __attribute__((always_inline)) 48 49__DEVICE__ long long abs(long long __n) { return ::llabs(__n); } 50__DEVICE__ long abs(long __n) { return ::labs(__n); } 51__DEVICE__ float abs(float __x) { return ::fabsf(__x); } 52__DEVICE__ double abs(double __x) { return ::fabs(__x); } 53__DEVICE__ float acos(float __x) { return ::acosf(__x); } 54__DEVICE__ float asin(float __x) { return ::asinf(__x); } 55__DEVICE__ float atan(float __x) { return ::atanf(__x); } 56__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); } 57__DEVICE__ float ceil(float __x) { return ::ceilf(__x); } 58__DEVICE__ float cos(float __x) { return ::cosf(__x); } 59__DEVICE__ float cosh(float __x) { return ::coshf(__x); } 60__DEVICE__ float exp(float __x) { return ::expf(__x); } 61__DEVICE__ float fabs(float __x) { return ::fabsf(__x); } 62__DEVICE__ float floor(float __x) { return ::floorf(__x); } 63__DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); } 64__DEVICE__ int fpclassify(float __x) { 65 return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, 66 FP_ZERO, __x); 67} 68__DEVICE__ int fpclassify(double __x) { 69 return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, 70 FP_ZERO, __x); 71} 72__DEVICE__ float frexp(float __arg, int *__exp) { 73 return ::frexpf(__arg, __exp); 74} 75 76// For inscrutable reasons, the CUDA headers define these functions for us on 77// Windows. 78#ifndef _MSC_VER 79__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); } 80__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); } 81__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); } 82// For inscrutable reasons, __finite(), the double-precision version of 83// __finitef, does not exist when compiling for MacOS. __isfinited is available 84// everywhere and is just as good. 85__DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); } 86__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); } 87__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); } 88#endif 89 90__DEVICE__ bool isgreater(float __x, float __y) { 91 return __builtin_isgreater(__x, __y); 92} 93__DEVICE__ bool isgreater(double __x, double __y) { 94 return __builtin_isgreater(__x, __y); 95} 96__DEVICE__ bool isgreaterequal(float __x, float __y) { 97 return __builtin_isgreaterequal(__x, __y); 98} 99__DEVICE__ bool isgreaterequal(double __x, double __y) { 100 return __builtin_isgreaterequal(__x, __y); 101} 102__DEVICE__ bool isless(float __x, float __y) { 103 return __builtin_isless(__x, __y); 104} 105__DEVICE__ bool isless(double __x, double __y) { 106 return __builtin_isless(__x, __y); 107} 108__DEVICE__ bool islessequal(float __x, float __y) { 109 return __builtin_islessequal(__x, __y); 110} 111__DEVICE__ bool islessequal(double __x, double __y) { 112 return __builtin_islessequal(__x, __y); 113} 114__DEVICE__ bool islessgreater(float __x, float __y) { 115 return __builtin_islessgreater(__x, __y); 116} 117__DEVICE__ bool islessgreater(double __x, double __y) { 118 return __builtin_islessgreater(__x, __y); 119} 120__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); } 121__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); } 122__DEVICE__ bool isunordered(float __x, float __y) { 123 return __builtin_isunordered(__x, __y); 124} 125__DEVICE__ bool isunordered(double __x, double __y) { 126 return __builtin_isunordered(__x, __y); 127} 128__DEVICE__ float ldexp(float __arg, int __exp) { 129 return ::ldexpf(__arg, __exp); 130} 131__DEVICE__ float log(float __x) { return ::logf(__x); } 132__DEVICE__ float log10(float __x) { return ::log10f(__x); } 133__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); } 134__DEVICE__ float nexttoward(float __from, double __to) { 135 return __builtin_nexttowardf(__from, __to); 136} 137__DEVICE__ double nexttoward(double __from, double __to) { 138 return __builtin_nexttoward(__from, __to); 139} 140__DEVICE__ float nexttowardf(float __from, double __to) { 141 return __builtin_nexttowardf(__from, __to); 142} 143__DEVICE__ float pow(float __base, float __exp) { 144 return ::powf(__base, __exp); 145} 146__DEVICE__ float pow(float __base, int __iexp) { 147 return ::powif(__base, __iexp); 148} 149__DEVICE__ double pow(double __base, int __iexp) { 150 return ::powi(__base, __iexp); 151} 152__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); } 153__DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); } 154__DEVICE__ float sin(float __x) { return ::sinf(__x); } 155__DEVICE__ float sinh(float __x) { return ::sinhf(__x); } 156__DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); } 157__DEVICE__ float tan(float __x) { return ::tanf(__x); } 158__DEVICE__ float tanh(float __x) { return ::tanhf(__x); } 159 160// Now we've defined everything we promised we'd define in 161// __clang_cuda_math_forward_declares.h. We need to do two additional things to 162// fix up our math functions. 163// 164// 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define 165// only sin(float) and sin(double), which means that e.g. sin(0) is 166// ambiguous. 167// 168// 2) Pull the __device__ overloads of "foobarf" math functions into namespace 169// std. These are defined in the CUDA headers in the global namespace, 170// independent of everything else we've done here. 171 172// We can't use std::enable_if, because we want to be pre-C++11 compatible. But 173// we go ahead and unconditionally define functions that are only available when 174// compiling for C++11 to match the behavior of the CUDA headers. 175template<bool __B, class __T = void> 176struct __clang_cuda_enable_if {}; 177 178template <class __T> struct __clang_cuda_enable_if<true, __T> { 179 typedef __T type; 180}; 181 182// Defines an overload of __fn that accepts one integral argument, calls 183// __fn((double)x), and returns __retty. 184#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \ 185 template <typename __T> \ 186 __DEVICE__ \ 187 typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, \ 188 __retty>::type \ 189 __fn(__T __x) { \ 190 return ::__fn((double)__x); \ 191 } 192 193// Defines an overload of __fn that accepts one two arithmetic arguments, calls 194// __fn((double)x, (double)y), and returns a double. 195// 196// Note this is different from OVERLOAD_1, which generates an overload that 197// accepts only *integral* arguments. 198#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \ 199 template <typename __T1, typename __T2> \ 200 __DEVICE__ typename __clang_cuda_enable_if< \ 201 std::numeric_limits<__T1>::is_specialized && \ 202 std::numeric_limits<__T2>::is_specialized, \ 203 __retty>::type \ 204 __fn(__T1 __x, __T2 __y) { \ 205 return __fn((double)__x, (double)__y); \ 206 } 207 208__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos) 209__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh) 210__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin) 211__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh) 212__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan) 213__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2); 214__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh) 215__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt) 216__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil) 217__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign); 218__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos) 219__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh) 220__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf) 221__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc) 222__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp) 223__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2) 224__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1) 225__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs) 226__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim); 227__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor) 228__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax); 229__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin); 230__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod); 231__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify) 232__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot); 233__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb) 234__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite) 235__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater); 236__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal); 237__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf); 238__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless); 239__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal); 240__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater); 241__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan); 242__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal) 243__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered); 244__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma) 245__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log) 246__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10) 247__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p) 248__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2) 249__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb) 250__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint) 251__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround) 252__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint) 253__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround) 254__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint); 255__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter); 256__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow); 257__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder); 258__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint); 259__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round); 260__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit) 261__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin) 262__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh) 263__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt) 264__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan) 265__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh) 266__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma) 267__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc); 268 269#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1 270#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2 271 272// Overloads for functions that don't match the patterns expected by 273// __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}. 274template <typename __T1, typename __T2, typename __T3> 275__DEVICE__ typename __clang_cuda_enable_if< 276 std::numeric_limits<__T1>::is_specialized && 277 std::numeric_limits<__T2>::is_specialized && 278 std::numeric_limits<__T3>::is_specialized, 279 double>::type 280fma(__T1 __x, __T2 __y, __T3 __z) { 281 return std::fma((double)__x, (double)__y, (double)__z); 282} 283 284template <typename __T> 285__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 286 double>::type 287frexp(__T __x, int *__exp) { 288 return std::frexp((double)__x, __exp); 289} 290 291template <typename __T> 292__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 293 double>::type 294ldexp(__T __x, int __exp) { 295 return std::ldexp((double)__x, __exp); 296} 297 298template <typename __T> 299__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 300 double>::type 301nexttoward(__T __from, double __to) { 302 return std::nexttoward((double)__from, __to); 303} 304 305template <typename __T1, typename __T2> 306__DEVICE__ typename __clang_cuda_enable_if< 307 std::numeric_limits<__T1>::is_specialized && 308 std::numeric_limits<__T2>::is_specialized, 309 double>::type 310remquo(__T1 __x, __T2 __y, int *__quo) { 311 return std::remquo((double)__x, (double)__y, __quo); 312} 313 314template <typename __T> 315__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 316 double>::type 317scalbln(__T __x, long __exp) { 318 return std::scalbln((double)__x, __exp); 319} 320 321template <typename __T> 322__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 323 double>::type 324scalbn(__T __x, int __exp) { 325 return std::scalbn((double)__x, __exp); 326} 327 328// We need to define these overloads in exactly the namespace our standard 329// library uses (including the right inline namespace), otherwise they won't be 330// picked up by other functions in the standard library (e.g. functions in 331// <complex>). Thus the ugliness below. 332#ifdef _LIBCPP_BEGIN_NAMESPACE_STD 333_LIBCPP_BEGIN_NAMESPACE_STD 334#else 335namespace std { 336#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION 337_GLIBCXX_BEGIN_NAMESPACE_VERSION 338#endif 339#endif 340 341// Pull the new overloads we defined above into namespace std. 342using ::acos; 343using ::acosh; 344using ::asin; 345using ::asinh; 346using ::atan; 347using ::atan2; 348using ::atanh; 349using ::cbrt; 350using ::ceil; 351using ::copysign; 352using ::cos; 353using ::cosh; 354using ::erf; 355using ::erfc; 356using ::exp; 357using ::exp2; 358using ::expm1; 359using ::fabs; 360using ::fdim; 361using ::floor; 362using ::fma; 363using ::fmax; 364using ::fmin; 365using ::fmod; 366using ::fpclassify; 367using ::frexp; 368using ::hypot; 369using ::ilogb; 370using ::isfinite; 371using ::isgreater; 372using ::isgreaterequal; 373using ::isless; 374using ::islessequal; 375using ::islessgreater; 376using ::isnormal; 377using ::isunordered; 378using ::ldexp; 379using ::lgamma; 380using ::llrint; 381using ::llround; 382using ::log; 383using ::log10; 384using ::log1p; 385using ::log2; 386using ::logb; 387using ::lrint; 388using ::lround; 389using ::nearbyint; 390using ::nextafter; 391using ::nexttoward; 392using ::pow; 393using ::remainder; 394using ::remquo; 395using ::rint; 396using ::round; 397using ::scalbln; 398using ::scalbn; 399using ::signbit; 400using ::sin; 401using ::sinh; 402using ::sqrt; 403using ::tan; 404using ::tanh; 405using ::tgamma; 406using ::trunc; 407 408// Well this is fun: We need to pull these symbols in for libc++, but we can't 409// pull them in with libstdc++, because its ::isinf and ::isnan are different 410// than its std::isinf and std::isnan. 411#ifndef __GLIBCXX__ 412using ::isinf; 413using ::isnan; 414#endif 415 416// Finally, pull the "foobarf" functions that CUDA defines in its headers into 417// namespace std. 418using ::acosf; 419using ::acoshf; 420using ::asinf; 421using ::asinhf; 422using ::atan2f; 423using ::atanf; 424using ::atanhf; 425using ::cbrtf; 426using ::ceilf; 427using ::copysignf; 428using ::cosf; 429using ::coshf; 430using ::erfcf; 431using ::erff; 432using ::exp2f; 433using ::expf; 434using ::expm1f; 435using ::fabsf; 436using ::fdimf; 437using ::floorf; 438using ::fmaf; 439using ::fmaxf; 440using ::fminf; 441using ::fmodf; 442using ::frexpf; 443using ::hypotf; 444using ::ilogbf; 445using ::ldexpf; 446using ::lgammaf; 447using ::llrintf; 448using ::llroundf; 449using ::log10f; 450using ::log1pf; 451using ::log2f; 452using ::logbf; 453using ::logf; 454using ::lrintf; 455using ::lroundf; 456using ::modff; 457using ::nearbyintf; 458using ::nextafterf; 459using ::nexttowardf; 460using ::nexttowardf; 461using ::powf; 462using ::remainderf; 463using ::remquof; 464using ::rintf; 465using ::roundf; 466using ::scalblnf; 467using ::scalbnf; 468using ::sinf; 469using ::sinhf; 470using ::sqrtf; 471using ::tanf; 472using ::tanhf; 473using ::tgammaf; 474using ::truncf; 475 476#ifdef _LIBCPP_END_NAMESPACE_STD 477_LIBCPP_END_NAMESPACE_STD 478#else 479#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION 480_GLIBCXX_END_NAMESPACE_VERSION 481#endif 482} // namespace std 483#endif 484 485#undef __DEVICE__ 486 487#endif 488