1a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar/*===---- __clang_cuda_runtime_wrapper.h - CUDA runtime support -------------===
2a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar *
3a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * Permission is hereby granted, free of charge, to any person obtaining a copy
4a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * of this software and associated documentation files (the "Software"), to deal
5a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * in the Software without restriction, including without limitation the rights
6a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * copies of the Software, and to permit persons to whom the Software is
8a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * furnished to do so, subject to the following conditions:
9a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar *
10a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * The above copyright notice and this permission notice shall be included in
11a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * all copies or substantial portions of the Software.
12a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar *
13a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * THE SOFTWARE.
20a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar *
21a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar *===-----------------------------------------------------------------------===
22a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar */
23a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
24a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar/*
25a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * WARNING: This header is intended to be directly -include'd by
26a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * the compiler and is not supposed to be included by users.
27a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar *
28a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * CUDA headers are implemented in a way that currently makes it
29a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * impossible for user code to #include directly when compiling with
30a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * Clang. They present different view of CUDA-supplied functions
31a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * depending on where in NVCC's compilation pipeline the headers are
32a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * included. Neither of these modes provides function definitions with
33a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * correct attributes, so we use preprocessor to force the headers
34a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * into a form that Clang can use.
35a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar *
36a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * Similarly to NVCC which -include's cuda_runtime.h, Clang -include's
37a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar * this file during every CUDA compilation.
38a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar */
39a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
40a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#ifndef __CLANG_CUDA_RUNTIME_WRAPPER_H__
41a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __CLANG_CUDA_RUNTIME_WRAPPER_H__
42a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
43a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#if defined(__CUDA__) && defined(__clang__)
44a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
45a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// Include some standard headers to avoid CUDA headers including them
46a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// while some required macros (like __THROW) are in a weird state.
47a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include <stdlib.h>
48a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include <cmath>
49a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
50a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// Preserve common macros that will be changed below by us or by CUDA
51a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// headers.
52a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#pragma push_macro("__THROW")
53a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#pragma push_macro("__CUDA_ARCH__")
54a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
55a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// WARNING: Preprocessor hacks below are based on specific details of
56a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// CUDA-7.x headers and are not expected to work with any other
57a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// version of CUDA headers.
58a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "cuda.h"
59a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#if !defined(CUDA_VERSION)
60a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#error "cuda.h did not define CUDA_VERSION"
61a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#elif CUDA_VERSION < 7000 || CUDA_VERSION > 7050
62a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#error "Unsupported CUDA version!"
63a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#endif
64a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
65a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// Make largest subset of device functions available during host
66a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// compilation -- SM_35 for the time being.
67a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#ifndef __CUDA_ARCH__
68a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __CUDA_ARCH__ 350
69a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#endif
70a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
71a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "cuda_builtin_vars.h"
72a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
73a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// No need for device_launch_parameters.h as cuda_builtin_vars.h above
74a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// has taken care of builtin variables declared in the file.
75a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __DEVICE_LAUNCH_PARAMETERS_H__
76a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
77a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// {math,device}_functions.h only have declarations of the
78a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// functions. We don't need them as we're going to pull in their
79a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// definitions from .hpp files.
80a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __DEVICE_FUNCTIONS_H__
81a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __MATH_FUNCTIONS_H__
82a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
83a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __CUDACC__
84a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __CUDABE__
85a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// Disables definitions of device-side runtime support stubs in
86a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// cuda_device_runtime_api.h
87a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __CUDADEVRT_INTERNAL__
88a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "host_config.h"
89a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "host_defines.h"
90a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "driver_types.h"
91a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "common_functions.h"
92a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __CUDADEVRT_INTERNAL__
93a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
94a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __CUDABE__
95a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __CUDACC__
96a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "cuda_runtime.h"
97a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
98a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __CUDACC__
99a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __CUDABE__
100a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
101a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// CUDA headers use __nvvm_memcpy and __nvvm_memset which Clang does
102a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// not have at the moment. Emulate them with a builtin memcpy/memset.
103a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __nvvm_memcpy(s,d,n,a) __builtin_memcpy(s,d,n)
104a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __nvvm_memset(d,c,n,a) __builtin_memset(d,c,n)
105a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
106a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "crt/host_runtime.h"
107a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "crt/device_runtime.h"
108a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// device_runtime.h defines __cxa_* macros that will conflict with
109a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// cxxabi.h.
110a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// FIXME: redefine these as __device__ functions.
111a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __cxa_vec_ctor
112a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __cxa_vec_cctor
113a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __cxa_vec_dtor
114a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __cxa_vec_new2
115a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __cxa_vec_new3
116a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __cxa_vec_delete2
117a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __cxa_vec_delete
118a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __cxa_vec_delete3
119a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __cxa_pure_virtual
120a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
121a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// We need decls for functions in CUDA's libdevice with __device__
122a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// attribute only. Alas they come either as __host__ __device__ or
123a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// with no attributes at all. To work around that, define __CUDA_RTC__
124a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// which produces HD variant and undef __host__ which gives us desided
125a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// decls with __device__ attribute.
126a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#pragma push_macro("__host__")
127a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __host__
128a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __CUDACC_RTC__
129a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "device_functions_decls.h"
130a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __CUDACC_RTC__
131a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
132a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// Temporarily poison __host__ macro to ensure it's not used by any of
133a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// the headers we're about to include.
134a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __host__ UNEXPECTED_HOST_ATTRIBUTE
135a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
136a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// device_functions.hpp and math_functions*.hpp use 'static
137a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// __forceinline__' (with no __device__) for definitions of device
138a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// functions. Temporarily redefine __forceinline__ to include
139a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// __device__.
140a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#pragma push_macro("__forceinline__")
141a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __forceinline__ __device__ __inline__ __attribute__((always_inline))
142a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "device_functions.hpp"
143a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "math_functions.hpp"
144a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "math_functions_dbl_ptx3.hpp"
145a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#pragma pop_macro("__forceinline__")
146a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
147a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// Pull in host-only functions that are only available when neither
148a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// __CUDACC__ nor __CUDABE__ are defined.
149a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __MATH_FUNCTIONS_HPP__
150a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __CUDABE__
151a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "math_functions.hpp"
152a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// Alas, additional overloads for these functions are hard to get to.
153a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// Considering that we only need these overloads for a few functions,
154a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// we can provide them here.
155a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainarstatic inline float rsqrt(float a) { return rsqrtf(a); }
156a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainarstatic inline float rcbrt(float a) { return rcbrtf(a); }
157a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainarstatic inline float sinpi(float a) { return sinpif(a); }
158a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainarstatic inline float cospi(float a) { return cospif(a); }
159a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainarstatic inline void sincospi(float a, float *b, float *c) {
160a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar  return sincospi(a, b, c);
161a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar}
162a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainarstatic inline float erfcinv(float a) { return erfcinvf(a); }
163a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainarstatic inline float normcdfinv(float a) { return normcdfinvf(a); }
164a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainarstatic inline float normcdf(float a) { return normcdff(a); }
165a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainarstatic inline float erfcx(float a) { return erfcxf(a); }
166a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
167a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// For some reason single-argument variant is not always declared by
168a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// CUDA headers. Alas, device_functions.hpp included below needs it.
169a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainarstatic inline __device__ void __brkpt(int c) { __brkpt(); }
170a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
171a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// Now include *.hpp with definitions of various GPU functions.  Alas,
172a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// a lot of thins get declared/defined with __host__ attribute which
173a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// we don't want and we have to define it out. We also have to include
174a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// {device,math}_functions.hpp again in order to extract the other
175a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// branch of #if/else inside.
176a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
177a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __host__
178a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __CUDABE__
179a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __CUDACC__
180a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __DEVICE_FUNCTIONS_HPP__
181a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "device_functions.hpp"
182a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "device_atomic_functions.hpp"
183a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "sm_20_atomic_functions.hpp"
184a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "sm_32_atomic_functions.hpp"
185a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "sm_20_intrinsics.hpp"
186a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// sm_30_intrinsics.h has declarations that use default argument, so
187a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// we have to include it and it will in turn include .hpp
188a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "sm_30_intrinsics.h"
189a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "sm_32_intrinsics.hpp"
190a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __MATH_FUNCTIONS_HPP__
191a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "math_functions.hpp"
192a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#pragma pop_macro("__host__")
193a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
194a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#include "texture_indirect_functions.h"
195a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
196a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// Restore state of __CUDA_ARCH__ and __THROW we had on entry.
197a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#pragma pop_macro("__CUDA_ARCH__")
198a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#pragma pop_macro("__THROW")
199a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
200a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// Set up compiler macros expected to be seen during compilation.
201a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#undef __CUDABE__
202a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __CUDACC__
203a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#define __NVCC__
204a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
205a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#if defined(__CUDA_ARCH__)
206a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// We need to emit IR declaration for non-existing __nvvm_reflect() to
207a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// let backend know that it should be treated as const nothrow
208a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar// function which is what NVVMReflect pass expects to see.
209a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainarextern "C" __device__ __attribute__((const)) int __nvvm_reflect(const void *);
210a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainarstatic __device__ __attribute__((used)) int __nvvm_reflect_anchor() {
211a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar  return __nvvm_reflect("NONE");
212a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar}
213a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#endif
214a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar
215a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#endif // __CUDA__
216a4de17562d13d7a8188108243c4cfbd52f33229aPirama Arumuga Nainar#endif // __CLANG_CUDA_RUNTIME_WRAPPER_H__
217