1// REQUIRES: nvptx-registered-target 2// RUN: %clang_cc1 -triple nvptx-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s 3// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s 4 5int read_tid() { 6 7// CHECK: call i32 @llvm.ptx.read.tid.x() 8// CHECK: call i32 @llvm.ptx.read.tid.y() 9// CHECK: call i32 @llvm.ptx.read.tid.z() 10// CHECK: call i32 @llvm.ptx.read.tid.w() 11 12 int x = __builtin_ptx_read_tid_x(); 13 int y = __builtin_ptx_read_tid_y(); 14 int z = __builtin_ptx_read_tid_z(); 15 int w = __builtin_ptx_read_tid_w(); 16 17 return x + y + z + w; 18 19} 20 21int read_ntid() { 22 23// CHECK: call i32 @llvm.ptx.read.ntid.x() 24// CHECK: call i32 @llvm.ptx.read.ntid.y() 25// CHECK: call i32 @llvm.ptx.read.ntid.z() 26// CHECK: call i32 @llvm.ptx.read.ntid.w() 27 28 int x = __builtin_ptx_read_ntid_x(); 29 int y = __builtin_ptx_read_ntid_y(); 30 int z = __builtin_ptx_read_ntid_z(); 31 int w = __builtin_ptx_read_ntid_w(); 32 33 return x + y + z + w; 34 35} 36 37int read_ctaid() { 38 39// CHECK: call i32 @llvm.ptx.read.ctaid.x() 40// CHECK: call i32 @llvm.ptx.read.ctaid.y() 41// CHECK: call i32 @llvm.ptx.read.ctaid.z() 42// CHECK: call i32 @llvm.ptx.read.ctaid.w() 43 44 int x = __builtin_ptx_read_ctaid_x(); 45 int y = __builtin_ptx_read_ctaid_y(); 46 int z = __builtin_ptx_read_ctaid_z(); 47 int w = __builtin_ptx_read_ctaid_w(); 48 49 return x + y + z + w; 50 51} 52 53int read_nctaid() { 54 55// CHECK: call i32 @llvm.ptx.read.nctaid.x() 56// CHECK: call i32 @llvm.ptx.read.nctaid.y() 57// CHECK: call i32 @llvm.ptx.read.nctaid.z() 58// CHECK: call i32 @llvm.ptx.read.nctaid.w() 59 60 int x = __builtin_ptx_read_nctaid_x(); 61 int y = __builtin_ptx_read_nctaid_y(); 62 int z = __builtin_ptx_read_nctaid_z(); 63 int w = __builtin_ptx_read_nctaid_w(); 64 65 return x + y + z + w; 66 67} 68 69int read_ids() { 70 71// CHECK: call i32 @llvm.ptx.read.laneid() 72// CHECK: call i32 @llvm.ptx.read.warpid() 73// CHECK: call i32 @llvm.ptx.read.nwarpid() 74// CHECK: call i32 @llvm.ptx.read.smid() 75// CHECK: call i32 @llvm.ptx.read.nsmid() 76// CHECK: call i32 @llvm.ptx.read.gridid() 77 78 int a = __builtin_ptx_read_laneid(); 79 int b = __builtin_ptx_read_warpid(); 80 int c = __builtin_ptx_read_nwarpid(); 81 int d = __builtin_ptx_read_smid(); 82 int e = __builtin_ptx_read_nsmid(); 83 int f = __builtin_ptx_read_gridid(); 84 85 return a + b + c + d + e + f; 86 87} 88 89int read_lanemasks() { 90 91// CHECK: call i32 @llvm.ptx.read.lanemask.eq() 92// CHECK: call i32 @llvm.ptx.read.lanemask.le() 93// CHECK: call i32 @llvm.ptx.read.lanemask.lt() 94// CHECK: call i32 @llvm.ptx.read.lanemask.ge() 95// CHECK: call i32 @llvm.ptx.read.lanemask.gt() 96 97 int a = __builtin_ptx_read_lanemask_eq(); 98 int b = __builtin_ptx_read_lanemask_le(); 99 int c = __builtin_ptx_read_lanemask_lt(); 100 int d = __builtin_ptx_read_lanemask_ge(); 101 int e = __builtin_ptx_read_lanemask_gt(); 102 103 return a + b + c + d + e; 104 105} 106 107 108long read_clocks() { 109 110// CHECK: call i32 @llvm.ptx.read.clock() 111// CHECK: call i64 @llvm.ptx.read.clock64() 112 113 int a = __builtin_ptx_read_clock(); 114 long b = __builtin_ptx_read_clock64(); 115 116 return (long)a + b; 117 118} 119 120int read_pms() { 121 122// CHECK: call i32 @llvm.ptx.read.pm0() 123// CHECK: call i32 @llvm.ptx.read.pm1() 124// CHECK: call i32 @llvm.ptx.read.pm2() 125// CHECK: call i32 @llvm.ptx.read.pm3() 126 127 int a = __builtin_ptx_read_pm0(); 128 int b = __builtin_ptx_read_pm1(); 129 int c = __builtin_ptx_read_pm2(); 130 int d = __builtin_ptx_read_pm3(); 131 132 return a + b + c + d; 133 134} 135 136void sync() { 137 138// CHECK: call void @llvm.ptx.bar.sync(i32 0) 139 140 __builtin_ptx_bar_sync(0); 141 142} 143 144 145// NVVM intrinsics 146 147// The idea is not to test all intrinsics, just that Clang is recognizing the 148// builtins defined in BuiltinsNVPTX.def 149void nvvm_math(float f1, float f2, double d1, double d2) { 150// CHECK: call float @llvm.nvvm.fmax.f 151 float t1 = __nvvm_fmax_f(f1, f2); 152// CHECK: call float @llvm.nvvm.fmin.f 153 float t2 = __nvvm_fmin_f(f1, f2); 154// CHECK: call float @llvm.nvvm.sqrt.rn.f 155 float t3 = __nvvm_sqrt_rn_f(f1); 156// CHECK: call float @llvm.nvvm.rcp.rn.f 157 float t4 = __nvvm_rcp_rn_f(f2); 158 159// CHECK: call double @llvm.nvvm.fmax.d 160 double td1 = __nvvm_fmax_d(d1, d2); 161// CHECK: call double @llvm.nvvm.fmin.d 162 double td2 = __nvvm_fmin_d(d1, d2); 163// CHECK: call double @llvm.nvvm.sqrt.rn.d 164 double td3 = __nvvm_sqrt_rn_d(d1); 165// CHECK: call double @llvm.nvvm.rcp.rn.d 166 double td4 = __nvvm_rcp_rn_d(d2); 167 168// CHECK: call void @llvm.nvvm.membar.cta() 169 __nvvm_membar_cta(); 170// CHECK: call void @llvm.nvvm.membar.gl() 171 __nvvm_membar_gl(); 172// CHECK: call void @llvm.nvvm.membar.sys() 173 __nvvm_membar_sys(); 174// CHECK: call void @llvm.nvvm.barrier0() 175 __nvvm_bar0(); 176} 177