__clang_cuda_intrinsics.h revision 6bdbd720989797e8a53237ef3ef213c4114f869
16bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/*===--- __clang_cuda_intrinsics.h - Device-side CUDA intrinsic wrappers ---=== 26bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * 36bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * Permission is hereby granted, free of charge, to any person obtaining a copy 46bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * of this software and associated documentation files (the "Software"), to deal 56bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * in the Software without restriction, including without limitation the rights 66bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 76bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * copies of the Software, and to permit persons to whom the Software is 86bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * furnished to do so, subject to the following conditions: 96bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * 106bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * The above copyright notice and this permission notice shall be included in 116bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * all copies or substantial portions of the Software. 126bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * 136bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 146bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 156bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 166bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 176bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 186bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 196bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * THE SOFTWARE. 206bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * 216bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker *===-----------------------------------------------------------------------=== 226bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker */ 236bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#ifndef __CLANG_CUDA_INTRINSICS_H__ 246bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define __CLANG_CUDA_INTRINSICS_H__ 256bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#ifndef __CUDA__ 266bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#error "This file is for CUDA compilation only." 276bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#endif 286bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 296bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker// sm_32 intrinsics: __ldg and __funnelshift_{l,lc,r,rc}. 306bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 316bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker// Prevent the vanilla sm_32 intrinsics header from being included. 326bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define __SM_32_INTRINSICS_H__ 336bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define __SM_32_INTRINSICS_HPP__ 346bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 356bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320 366bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 376bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ char __ldg(const char *ptr) { return __nvvm_ldg_c(ptr); } 386bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ short __ldg(const short *ptr) { return __nvvm_ldg_s(ptr); } 396bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ int __ldg(const int *ptr) { return __nvvm_ldg_i(ptr); } 406bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ long __ldg(const long *ptr) { return __nvvm_ldg_l(ptr); } 416bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ long long __ldg(const long long *ptr) { 426bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return __nvvm_ldg_ll(ptr); 436bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 446bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ unsigned char __ldg(const unsigned char *ptr) { 456bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return __nvvm_ldg_uc(ptr); 466bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 476bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ unsigned short __ldg(const unsigned short *ptr) { 486bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return __nvvm_ldg_us(ptr); 496bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 506bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ unsigned int __ldg(const unsigned int *ptr) { 516bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return __nvvm_ldg_ui(ptr); 526bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 536bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ unsigned long __ldg(const unsigned long *ptr) { 546bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return __nvvm_ldg_ul(ptr); 556bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 566bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ unsigned long long __ldg(const unsigned long long *ptr) { 576bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return __nvvm_ldg_ull(ptr); 586bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 596bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ float __ldg(const float *ptr) { return __nvvm_ldg_f(ptr); } 606bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ double __ldg(const double *ptr) { return __nvvm_ldg_d(ptr); } 616bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 626bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ char2 __ldg(const char2 *ptr) { 636bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker typedef char c2 __attribute__((ext_vector_type(2))); 646bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker // We can assume that ptr is aligned at least to char2's alignment, but the 656bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker // load will assume that ptr is aligned to char2's alignment. This is only 666bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker // safe if alignof(c2) <= alignof(char2). 676bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker c2 rv = __nvvm_ldg_c2(reinterpret_cast<const c2 *>(ptr)); 686bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker char2 ret; 696bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.x = rv[0]; 706bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.y = rv[1]; 716bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return ret; 726bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 736bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ char4 __ldg(const char4 *ptr) { 746bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker typedef char c4 __attribute__((ext_vector_type(4))); 756bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker c4 rv = __nvvm_ldg_c4(reinterpret_cast<const c4 *>(ptr)); 766bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker char4 ret; 776bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.x = rv[0]; 786bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.y = rv[1]; 796bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.z = rv[2]; 806bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.w = rv[3]; 816bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return ret; 826bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 836bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ short2 __ldg(const short2 *ptr) { 846bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker typedef short s2 __attribute__((ext_vector_type(2))); 856bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker s2 rv = __nvvm_ldg_s2(reinterpret_cast<const s2 *>(ptr)); 866bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker short2 ret; 876bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.x = rv[0]; 886bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.y = rv[1]; 896bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return ret; 906bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 916bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ short4 __ldg(const short4 *ptr) { 926bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker typedef short s4 __attribute__((ext_vector_type(4))); 936bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker s4 rv = __nvvm_ldg_s4(reinterpret_cast<const s4 *>(ptr)); 946bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker short4 ret; 956bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.x = rv[0]; 966bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.y = rv[1]; 976bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.z = rv[2]; 986bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.w = rv[3]; 996bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return ret; 1006bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 1016bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ int2 __ldg(const int2 *ptr) { 1026bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker typedef int i2 __attribute__((ext_vector_type(2))); 1036bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker i2 rv = __nvvm_ldg_i2(reinterpret_cast<const i2 *>(ptr)); 1046bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker int2 ret; 1056bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.x = rv[0]; 1066bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.y = rv[1]; 1076bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return ret; 1086bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 1096bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ int4 __ldg(const int4 *ptr) { 1106bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker typedef int i4 __attribute__((ext_vector_type(4))); 1116bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker i4 rv = __nvvm_ldg_i4(reinterpret_cast<const i4 *>(ptr)); 1126bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker int4 ret; 1136bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.x = rv[0]; 1146bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.y = rv[1]; 1156bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.z = rv[2]; 1166bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.w = rv[3]; 1176bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return ret; 1186bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 1196bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ longlong2 __ldg(const longlong2 *ptr) { 1206bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker typedef long long ll2 __attribute__((ext_vector_type(2))); 1216bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ll2 rv = __nvvm_ldg_ll2(reinterpret_cast<const ll2 *>(ptr)); 1226bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker longlong2 ret; 1236bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.x = rv[0]; 1246bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.y = rv[1]; 1256bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return ret; 1266bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 1276bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 1286bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ uchar2 __ldg(const uchar2 *ptr) { 1296bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker typedef unsigned char uc2 __attribute__((ext_vector_type(2))); 1306bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker uc2 rv = __nvvm_ldg_uc2(reinterpret_cast<const uc2 *>(ptr)); 1316bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker uchar2 ret; 1326bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.x = rv[0]; 1336bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.y = rv[1]; 1346bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return ret; 1356bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 1366bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ uchar4 __ldg(const uchar4 *ptr) { 1376bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker typedef unsigned char uc4 __attribute__((ext_vector_type(4))); 1386bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker uc4 rv = __nvvm_ldg_uc4(reinterpret_cast<const uc4 *>(ptr)); 1396bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker uchar4 ret; 1406bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.x = rv[0]; 1416bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.y = rv[1]; 1426bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.z = rv[2]; 1436bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.w = rv[3]; 1446bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return ret; 1456bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 1466bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ ushort2 __ldg(const ushort2 *ptr) { 1476bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker typedef unsigned short us2 __attribute__((ext_vector_type(2))); 1486bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker us2 rv = __nvvm_ldg_us2(reinterpret_cast<const us2 *>(ptr)); 1496bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ushort2 ret; 1506bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.x = rv[0]; 1516bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.y = rv[1]; 1526bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return ret; 1536bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 1546bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ ushort4 __ldg(const ushort4 *ptr) { 1556bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker typedef unsigned short us4 __attribute__((ext_vector_type(4))); 1566bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker us4 rv = __nvvm_ldg_us4(reinterpret_cast<const us4 *>(ptr)); 1576bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ushort4 ret; 1586bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.x = rv[0]; 1596bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.y = rv[1]; 1606bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.z = rv[2]; 1616bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.w = rv[3]; 1626bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return ret; 1636bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 1646bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ uint2 __ldg(const uint2 *ptr) { 1656bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker typedef unsigned int ui2 __attribute__((ext_vector_type(2))); 1666bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ui2 rv = __nvvm_ldg_ui2(reinterpret_cast<const ui2 *>(ptr)); 1676bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker uint2 ret; 1686bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.x = rv[0]; 1696bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.y = rv[1]; 1706bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return ret; 1716bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 1726bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ uint4 __ldg(const uint4 *ptr) { 1736bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker typedef unsigned int ui4 __attribute__((ext_vector_type(4))); 1746bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ui4 rv = __nvvm_ldg_ui4(reinterpret_cast<const ui4 *>(ptr)); 1756bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker uint4 ret; 1766bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.x = rv[0]; 1776bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.y = rv[1]; 1786bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.z = rv[2]; 1796bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.w = rv[3]; 1806bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return ret; 1816bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 1826bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ ulonglong2 __ldg(const ulonglong2 *ptr) { 1836bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker typedef unsigned long long ull2 __attribute__((ext_vector_type(2))); 1846bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ull2 rv = __nvvm_ldg_ull2(reinterpret_cast<const ull2 *>(ptr)); 1856bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ulonglong2 ret; 1866bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.x = rv[0]; 1876bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.y = rv[1]; 1886bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return ret; 1896bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 1906bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 1916bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ float2 __ldg(const float2 *ptr) { 1926bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker typedef float f2 __attribute__((ext_vector_type(2))); 1936bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker f2 rv = __nvvm_ldg_f2(reinterpret_cast<const f2 *>(ptr)); 1946bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker float2 ret; 1956bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.x = rv[0]; 1966bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.y = rv[1]; 1976bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return ret; 1986bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 1996bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ float4 __ldg(const float4 *ptr) { 2006bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker typedef float f4 __attribute__((ext_vector_type(4))); 2016bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker f4 rv = __nvvm_ldg_f4(reinterpret_cast<const f4 *>(ptr)); 2026bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker float4 ret; 2036bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.x = rv[0]; 2046bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.y = rv[1]; 2056bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.z = rv[2]; 2066bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.w = rv[3]; 2076bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return ret; 2086bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 2096bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ double2 __ldg(const double2 *ptr) { 2106bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker typedef double d2 __attribute__((ext_vector_type(2))); 2116bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker d2 rv = __nvvm_ldg_d2(reinterpret_cast<const d2 *>(ptr)); 2126bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker double2 ret; 2136bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.x = rv[0]; 2146bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ret.y = rv[1]; 2156bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return ret; 2166bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 2176bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 2186bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker// TODO: Implement these as intrinsics, so the backend can work its magic on 2196bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker// these. Alternatively, we could implement these as plain C and try to get 2206bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker// llvm to recognize the relevant patterns. 2216bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ unsigned __funnelshift_l(unsigned low32, unsigned high32, 2226bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker unsigned shiftWidth) { 2236bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker unsigned result; 2246bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker asm("shf.l.wrap.b32 %0, %1, %2, %3;" 2256bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker : "=r"(result) 2266bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker : "r"(low32), "r"(high32), "r"(shiftWidth)); 2276bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return result; 2286bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 2296bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ unsigned __funnelshift_lc(unsigned low32, unsigned high32, 2306bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker unsigned shiftWidth) { 2316bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker unsigned result; 2326bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker asm("shf.l.clamp.b32 %0, %1, %2, %3;" 2336bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker : "=r"(result) 2346bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker : "r"(low32), "r"(high32), "r"(shiftWidth)); 2356bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return result; 2366bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 2376bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ unsigned __funnelshift_r(unsigned low32, unsigned high32, 2386bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker unsigned shiftWidth) { 2396bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker unsigned result; 2406bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker asm("shf.r.wrap.b32 %0, %1, %2, %3;" 2416bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker : "=r"(result) 2426bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker : "r"(low32), "r"(high32), "r"(shiftWidth)); 2436bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return result; 2446bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 2456bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerinline __device__ unsigned __funnelshift_rc(unsigned low32, unsigned high32, 2466bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker unsigned shiftWidth) { 2476bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker unsigned ret; 2486bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker asm("shf.r.clamp.b32 %0, %1, %2, %3;" 2496bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker : "=r"(ret) 2506bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker : "r"(low32), "r"(high32), "r"(shiftWidth)); 2516bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return ret; 2526bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 2536bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 2546bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320 2556bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 2566bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#endif // defined(__CLANG_CUDA_INTRINSICS_H__) 257