1// REQUIRES: nvptx-registered-target
2// RUN: %clang_cc1 -triple nvptx-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
3// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
4
5int read_tid() {
6
7// CHECK: call i32 @llvm.ptx.read.tid.x()
8// CHECK: call i32 @llvm.ptx.read.tid.y()
9// CHECK: call i32 @llvm.ptx.read.tid.z()
10// CHECK: call i32 @llvm.ptx.read.tid.w()
11
12  int x = __builtin_ptx_read_tid_x();
13  int y = __builtin_ptx_read_tid_y();
14  int z = __builtin_ptx_read_tid_z();
15  int w = __builtin_ptx_read_tid_w();
16
17  return x + y + z + w;
18
19}
20
21int read_ntid() {
22
23// CHECK: call i32 @llvm.ptx.read.ntid.x()
24// CHECK: call i32 @llvm.ptx.read.ntid.y()
25// CHECK: call i32 @llvm.ptx.read.ntid.z()
26// CHECK: call i32 @llvm.ptx.read.ntid.w()
27
28  int x = __builtin_ptx_read_ntid_x();
29  int y = __builtin_ptx_read_ntid_y();
30  int z = __builtin_ptx_read_ntid_z();
31  int w = __builtin_ptx_read_ntid_w();
32
33  return x + y + z + w;
34
35}
36
37int read_ctaid() {
38
39// CHECK: call i32 @llvm.ptx.read.ctaid.x()
40// CHECK: call i32 @llvm.ptx.read.ctaid.y()
41// CHECK: call i32 @llvm.ptx.read.ctaid.z()
42// CHECK: call i32 @llvm.ptx.read.ctaid.w()
43
44  int x = __builtin_ptx_read_ctaid_x();
45  int y = __builtin_ptx_read_ctaid_y();
46  int z = __builtin_ptx_read_ctaid_z();
47  int w = __builtin_ptx_read_ctaid_w();
48
49  return x + y + z + w;
50
51}
52
53int read_nctaid() {
54
55// CHECK: call i32 @llvm.ptx.read.nctaid.x()
56// CHECK: call i32 @llvm.ptx.read.nctaid.y()
57// CHECK: call i32 @llvm.ptx.read.nctaid.z()
58// CHECK: call i32 @llvm.ptx.read.nctaid.w()
59
60  int x = __builtin_ptx_read_nctaid_x();
61  int y = __builtin_ptx_read_nctaid_y();
62  int z = __builtin_ptx_read_nctaid_z();
63  int w = __builtin_ptx_read_nctaid_w();
64
65  return x + y + z + w;
66
67}
68
69int read_ids() {
70
71// CHECK: call i32 @llvm.ptx.read.laneid()
72// CHECK: call i32 @llvm.ptx.read.warpid()
73// CHECK: call i32 @llvm.ptx.read.nwarpid()
74// CHECK: call i32 @llvm.ptx.read.smid()
75// CHECK: call i32 @llvm.ptx.read.nsmid()
76// CHECK: call i32 @llvm.ptx.read.gridid()
77
78  int a = __builtin_ptx_read_laneid();
79  int b = __builtin_ptx_read_warpid();
80  int c = __builtin_ptx_read_nwarpid();
81  int d = __builtin_ptx_read_smid();
82  int e = __builtin_ptx_read_nsmid();
83  int f = __builtin_ptx_read_gridid();
84
85  return a + b + c + d + e + f;
86
87}
88
89int read_lanemasks() {
90
91// CHECK: call i32 @llvm.ptx.read.lanemask.eq()
92// CHECK: call i32 @llvm.ptx.read.lanemask.le()
93// CHECK: call i32 @llvm.ptx.read.lanemask.lt()
94// CHECK: call i32 @llvm.ptx.read.lanemask.ge()
95// CHECK: call i32 @llvm.ptx.read.lanemask.gt()
96
97  int a = __builtin_ptx_read_lanemask_eq();
98  int b = __builtin_ptx_read_lanemask_le();
99  int c = __builtin_ptx_read_lanemask_lt();
100  int d = __builtin_ptx_read_lanemask_ge();
101  int e = __builtin_ptx_read_lanemask_gt();
102
103  return a + b + c + d + e;
104
105}
106
107
108long read_clocks() {
109
110// CHECK: call i32 @llvm.ptx.read.clock()
111// CHECK: call i64 @llvm.ptx.read.clock64()
112
113  int a = __builtin_ptx_read_clock();
114  long b = __builtin_ptx_read_clock64();
115
116  return (long)a + b;
117
118}
119
120int read_pms() {
121
122// CHECK: call i32 @llvm.ptx.read.pm0()
123// CHECK: call i32 @llvm.ptx.read.pm1()
124// CHECK: call i32 @llvm.ptx.read.pm2()
125// CHECK: call i32 @llvm.ptx.read.pm3()
126
127  int a = __builtin_ptx_read_pm0();
128  int b = __builtin_ptx_read_pm1();
129  int c = __builtin_ptx_read_pm2();
130  int d = __builtin_ptx_read_pm3();
131
132  return a + b + c + d;
133
134}
135
136void sync() {
137
138// CHECK: call void @llvm.ptx.bar.sync(i32 0)
139
140  __builtin_ptx_bar_sync(0);
141
142}
143
144
145// NVVM intrinsics
146
147// The idea is not to test all intrinsics, just that Clang is recognizing the
148// builtins defined in BuiltinsNVPTX.def
149void nvvm_math(float f1, float f2, double d1, double d2) {
150// CHECK: call float @llvm.nvvm.fmax.f
151  float t1 = __nvvm_fmax_f(f1, f2);
152// CHECK: call float @llvm.nvvm.fmin.f
153  float t2 = __nvvm_fmin_f(f1, f2);
154// CHECK: call float @llvm.nvvm.sqrt.rn.f
155  float t3 = __nvvm_sqrt_rn_f(f1);
156// CHECK: call float @llvm.nvvm.rcp.rn.f
157  float t4 = __nvvm_rcp_rn_f(f2);
158
159// CHECK: call double @llvm.nvvm.fmax.d
160  double td1 = __nvvm_fmax_d(d1, d2);
161// CHECK: call double @llvm.nvvm.fmin.d
162  double td2 = __nvvm_fmin_d(d1, d2);
163// CHECK: call double @llvm.nvvm.sqrt.rn.d
164  double td3 = __nvvm_sqrt_rn_d(d1);
165// CHECK: call double @llvm.nvvm.rcp.rn.d
166  double td4 = __nvvm_rcp_rn_d(d2);
167
168// CHECK: call void @llvm.nvvm.membar.cta()
169  __nvvm_membar_cta();
170// CHECK: call void @llvm.nvvm.membar.gl()
171  __nvvm_membar_gl();
172// CHECK: call void @llvm.nvvm.membar.sys()
173  __nvvm_membar_sys();
174// CHECK: call void @llvm.nvvm.barrier0()
175  __nvvm_bar0();
176}
177