1a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar/*===---- __clang_cuda_runtime_wrapper.h - CUDA runtime support -------------=== 2a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * 3a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * Permission is hereby granted, free of charge, to any person obtaining a copy 4a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * of this software and associated documentation files (the "Software"), to deal 5a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * in the Software without restriction, including without limitation the rights 6a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * copies of the Software, and to permit persons to whom the Software is 8a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * furnished to do so, subject to the following conditions: 9a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * 10a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * The above copyright notice and this permission notice shall be included in 11a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * all copies or substantial portions of the Software. 12a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * 13a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * THE SOFTWARE. 20a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * 21a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar *===-----------------------------------------------------------------------=== 22a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar */ 23a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 24a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar/* 25a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * WARNING: This header is intended to be directly -include'd by 26a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * the compiler and is not supposed to be included by users. 27a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * 28a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * CUDA headers are implemented in a way that currently makes it 29a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * impossible for user code to #include directly when compiling with 30a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * Clang. They present different view of CUDA-supplied functions 31a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * depending on where in NVCC's compilation pipeline the headers are 32a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * included. Neither of these modes provides function definitions with 33a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * correct attributes, so we use preprocessor to force the headers 34a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * into a form that Clang can use. 35a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * 36a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * Similarly to NVCC which -include's cuda_runtime.h, Clang -include's 37a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * this file during every CUDA compilation. 38a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar */ 39a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 40a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#ifndef __CLANG_CUDA_RUNTIME_WRAPPER_H__ 41a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __CLANG_CUDA_RUNTIME_WRAPPER_H__ 42a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 43a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#if defined(__CUDA__) && defined(__clang__) 44a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 45a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// Include some standard headers to avoid CUDA headers including them 46a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// while some required macros (like __THROW) are in a weird state. 47a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include <stdlib.h> 48a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include <cmath> 49a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 50a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// Preserve common macros that will be changed below by us or by CUDA 51a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// headers. 52a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#pragma push_macro("__THROW") 53a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#pragma push_macro("__CUDA_ARCH__") 54a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 55a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// WARNING: Preprocessor hacks below are based on specific details of 56a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// CUDA-7.x headers and are not expected to work with any other 57a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// version of CUDA headers. 58a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "cuda.h" 59a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#if !defined(CUDA_VERSION) 60a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#error "cuda.h did not define CUDA_VERSION" 61a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#elif CUDA_VERSION < 7000 || CUDA_VERSION > 7050 62a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#error "Unsupported CUDA version!" 63a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#endif 64a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 65a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// Make largest subset of device functions available during host 66a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// compilation -- SM_35 for the time being. 67a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#ifndef __CUDA_ARCH__ 68a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __CUDA_ARCH__ 350 69a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#endif 70a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 71a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "cuda_builtin_vars.h" 72a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 73a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// No need for device_launch_parameters.h as cuda_builtin_vars.h above 74a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// has taken care of builtin variables declared in the file. 75a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __DEVICE_LAUNCH_PARAMETERS_H__ 76a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 77a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// {math,device}_functions.h only have declarations of the 78a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// functions. We don't need them as we're going to pull in their 79a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// definitions from .hpp files. 80a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __DEVICE_FUNCTIONS_H__ 81a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __MATH_FUNCTIONS_H__ 82a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 83a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __CUDACC__ 84a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __CUDABE__ 85a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// Disables definitions of device-side runtime support stubs in 86a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// cuda_device_runtime_api.h 87a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __CUDADEVRT_INTERNAL__ 88a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "host_config.h" 89a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "host_defines.h" 90a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "driver_types.h" 91a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "common_functions.h" 92a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __CUDADEVRT_INTERNAL__ 93a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 94a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __CUDABE__ 95a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __CUDACC__ 96a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "cuda_runtime.h" 97a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 98a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __CUDACC__ 99a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __CUDABE__ 100a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 101a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// CUDA headers use __nvvm_memcpy and __nvvm_memset which Clang does 102a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// not have at the moment. Emulate them with a builtin memcpy/memset. 103a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __nvvm_memcpy(s,d,n,a) __builtin_memcpy(s,d,n) 104a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __nvvm_memset(d,c,n,a) __builtin_memset(d,c,n) 105a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 106a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "crt/host_runtime.h" 107a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "crt/device_runtime.h" 108a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// device_runtime.h defines __cxa_* macros that will conflict with 109a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// cxxabi.h. 110a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// FIXME: redefine these as __device__ functions. 111a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __cxa_vec_ctor 112a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __cxa_vec_cctor 113a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __cxa_vec_dtor 114a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __cxa_vec_new2 115a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __cxa_vec_new3 116a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __cxa_vec_delete2 117a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __cxa_vec_delete 118a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __cxa_vec_delete3 119a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __cxa_pure_virtual 120a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 121a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// We need decls for functions in CUDA's libdevice with __device__ 122a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// attribute only. Alas they come either as __host__ __device__ or 123a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// with no attributes at all. To work around that, define __CUDA_RTC__ 124a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// which produces HD variant and undef __host__ which gives us desided 125a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// decls with __device__ attribute. 126a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#pragma push_macro("__host__") 127a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __host__ 128a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __CUDACC_RTC__ 129a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "device_functions_decls.h" 130a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __CUDACC_RTC__ 131a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 132a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// Temporarily poison __host__ macro to ensure it's not used by any of 133a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// the headers we're about to include. 134a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __host__ UNEXPECTED_HOST_ATTRIBUTE 135a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 136a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// device_functions.hpp and math_functions*.hpp use 'static 137a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// __forceinline__' (with no __device__) for definitions of device 138a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// functions. Temporarily redefine __forceinline__ to include 139a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// __device__. 140a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#pragma push_macro("__forceinline__") 141a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __forceinline__ __device__ __inline__ __attribute__((always_inline)) 142a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "device_functions.hpp" 143a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "math_functions.hpp" 144a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "math_functions_dbl_ptx3.hpp" 145a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#pragma pop_macro("__forceinline__") 146a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 147a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// Pull in host-only functions that are only available when neither 148a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// __CUDACC__ nor __CUDABE__ are defined. 149a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __MATH_FUNCTIONS_HPP__ 150a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __CUDABE__ 151a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "math_functions.hpp" 152a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// Alas, additional overloads for these functions are hard to get to. 153a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// Considering that we only need these overloads for a few functions, 154a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// we can provide them here. 155a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainarstatic inline float rsqrt(float a) { return rsqrtf(a); } 156a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainarstatic inline float rcbrt(float a) { return rcbrtf(a); } 157a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainarstatic inline float sinpi(float a) { return sinpif(a); } 158a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainarstatic inline float cospi(float a) { return cospif(a); } 159a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainarstatic inline void sincospi(float a, float *b, float *c) { 160a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar return sincospi(a, b, c); 161a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar} 162a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainarstatic inline float erfcinv(float a) { return erfcinvf(a); } 163a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainarstatic inline float normcdfinv(float a) { return normcdfinvf(a); } 164a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainarstatic inline float normcdf(float a) { return normcdff(a); } 165a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainarstatic inline float erfcx(float a) { return erfcxf(a); } 166a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 167a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// For some reason single-argument variant is not always declared by 168a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// CUDA headers. Alas, device_functions.hpp included below needs it. 169a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainarstatic inline __device__ void __brkpt(int c) { __brkpt(); } 170a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 171a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// Now include *.hpp with definitions of various GPU functions. Alas, 172a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// a lot of thins get declared/defined with __host__ attribute which 173a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// we don't want and we have to define it out. We also have to include 174a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// {device,math}_functions.hpp again in order to extract the other 175a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// branch of #if/else inside. 176a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 177a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __host__ 178a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __CUDABE__ 179a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __CUDACC__ 180a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __DEVICE_FUNCTIONS_HPP__ 181a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "device_functions.hpp" 182a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "device_atomic_functions.hpp" 183a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "sm_20_atomic_functions.hpp" 184a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "sm_32_atomic_functions.hpp" 185a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "sm_20_intrinsics.hpp" 186a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// sm_30_intrinsics.h has declarations that use default argument, so 187a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// we have to include it and it will in turn include .hpp 188a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "sm_30_intrinsics.h" 189a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "sm_32_intrinsics.hpp" 190a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __MATH_FUNCTIONS_HPP__ 191a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "math_functions.hpp" 192a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#pragma pop_macro("__host__") 193a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 194a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "texture_indirect_functions.h" 195a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 196a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// Restore state of __CUDA_ARCH__ and __THROW we had on entry. 197a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#pragma pop_macro("__CUDA_ARCH__") 198a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#pragma pop_macro("__THROW") 199a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 200a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// Set up compiler macros expected to be seen during compilation. 201a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __CUDABE__ 202a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __CUDACC__ 203a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __NVCC__ 204a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 205a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#if defined(__CUDA_ARCH__) 206a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// We need to emit IR declaration for non-existing __nvvm_reflect() to 207a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// let backend know that it should be treated as const nothrow 208a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// function which is what NVVMReflect pass expects to see. 209a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainarextern "C" __device__ __attribute__((const)) int __nvvm_reflect(const void *); 210a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainarstatic __device__ __attribute__((used)) int __nvvm_reflect_anchor() { 211a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar return __nvvm_reflect("NONE"); 212a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar} 213a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#endif 214a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar 215a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#endif // __CUDA__ 216a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#endif // __CLANG_CUDA_RUNTIME_WRAPPER_H__ 217