1; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s
2
3declare i32 @llvm.amdgcn.workgroup.id.x() #0
4declare i32 @llvm.amdgcn.workgroup.id.y() #0
5declare i32 @llvm.amdgcn.workgroup.id.z() #0
6
7declare i32 @llvm.amdgcn.workitem.id.x() #0
8declare i32 @llvm.amdgcn.workitem.id.y() #0
9declare i32 @llvm.amdgcn.workitem.id.z() #0
10
11declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
12declare i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
13
14; HSA: define void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
15define void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
16  %val = call i32 @llvm.amdgcn.workgroup.id.x()
17  store i32 %val, i32 addrspace(1)* %ptr
18  ret void
19}
20
21; HSA: define void @use_tgid_y(i32 addrspace(1)* %ptr) #2 {
22define void @use_tgid_y(i32 addrspace(1)* %ptr) #1 {
23  %val = call i32 @llvm.amdgcn.workgroup.id.y()
24  store i32 %val, i32 addrspace(1)* %ptr
25  ret void
26}
27
28; HSA: define void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #2 {
29define void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 {
30  %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
31  store volatile i32 %val0, i32 addrspace(1)* %ptr
32  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
33  store volatile i32 %val1, i32 addrspace(1)* %ptr
34  ret void
35}
36
37; HSA: define void @use_tgid_x_y(i32 addrspace(1)* %ptr) #2 {
38define void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 {
39  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
40  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
41  store volatile i32 %val0, i32 addrspace(1)* %ptr
42  store volatile i32 %val1, i32 addrspace(1)* %ptr
43  ret void
44}
45
46; HSA: define void @use_tgid_z(i32 addrspace(1)* %ptr) #3 {
47define void @use_tgid_z(i32 addrspace(1)* %ptr) #1 {
48  %val = call i32 @llvm.amdgcn.workgroup.id.z()
49  store i32 %val, i32 addrspace(1)* %ptr
50  ret void
51}
52
53; HSA: define void @use_tgid_x_z(i32 addrspace(1)* %ptr) #3 {
54define void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 {
55  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
56  %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
57  store volatile i32 %val0, i32 addrspace(1)* %ptr
58  store volatile i32 %val1, i32 addrspace(1)* %ptr
59  ret void
60}
61
62; HSA: define void @use_tgid_y_z(i32 addrspace(1)* %ptr) #4 {
63define void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 {
64  %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
65  %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
66  store volatile i32 %val0, i32 addrspace(1)* %ptr
67  store volatile i32 %val1, i32 addrspace(1)* %ptr
68  ret void
69}
70
71; HSA: define void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #4 {
72define void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 {
73  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
74  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
75  %val2 = call i32 @llvm.amdgcn.workgroup.id.z()
76  store volatile i32 %val0, i32 addrspace(1)* %ptr
77  store volatile i32 %val1, i32 addrspace(1)* %ptr
78  store volatile i32 %val2, i32 addrspace(1)* %ptr
79  ret void
80}
81
82; HSA: define void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
83define void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
84  %val = call i32 @llvm.amdgcn.workitem.id.x()
85  store i32 %val, i32 addrspace(1)* %ptr
86  ret void
87}
88
89; HSA: define void @use_tidig_y(i32 addrspace(1)* %ptr) #5 {
90define void @use_tidig_y(i32 addrspace(1)* %ptr) #1 {
91  %val = call i32 @llvm.amdgcn.workitem.id.y()
92  store i32 %val, i32 addrspace(1)* %ptr
93  ret void
94}
95
96; HSA: define void @use_tidig_z(i32 addrspace(1)* %ptr) #6 {
97define void @use_tidig_z(i32 addrspace(1)* %ptr) #1 {
98  %val = call i32 @llvm.amdgcn.workitem.id.z()
99  store i32 %val, i32 addrspace(1)* %ptr
100  ret void
101}
102
103; HSA: define void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
104define void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
105  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
106  %val1 = call i32 @llvm.amdgcn.workgroup.id.x()
107  store volatile i32 %val0, i32 addrspace(1)* %ptr
108  store volatile i32 %val1, i32 addrspace(1)* %ptr
109  ret void
110}
111
112; HSA: define void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #7 {
113define void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 {
114  %val0 = call i32 @llvm.amdgcn.workitem.id.y()
115  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
116  store volatile i32 %val0, i32 addrspace(1)* %ptr
117  store volatile i32 %val1, i32 addrspace(1)* %ptr
118  ret void
119}
120
121; HSA: define void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #8 {
122define void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 {
123  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
124  %val1 = call i32 @llvm.amdgcn.workitem.id.y()
125  %val2 = call i32 @llvm.amdgcn.workitem.id.z()
126  store volatile i32 %val0, i32 addrspace(1)* %ptr
127  store volatile i32 %val1, i32 addrspace(1)* %ptr
128  store volatile i32 %val2, i32 addrspace(1)* %ptr
129  ret void
130}
131
132; HSA: define void @use_all_workitems(i32 addrspace(1)* %ptr) #9 {
133define void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
134  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
135  %val1 = call i32 @llvm.amdgcn.workitem.id.y()
136  %val2 = call i32 @llvm.amdgcn.workitem.id.z()
137  %val3 = call i32 @llvm.amdgcn.workgroup.id.x()
138  %val4 = call i32 @llvm.amdgcn.workgroup.id.y()
139  %val5 = call i32 @llvm.amdgcn.workgroup.id.z()
140  store volatile i32 %val0, i32 addrspace(1)* %ptr
141  store volatile i32 %val1, i32 addrspace(1)* %ptr
142  store volatile i32 %val2, i32 addrspace(1)* %ptr
143  store volatile i32 %val3, i32 addrspace(1)* %ptr
144  store volatile i32 %val4, i32 addrspace(1)* %ptr
145  store volatile i32 %val5, i32 addrspace(1)* %ptr
146  ret void
147}
148
149; HSA: define void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #10 {
150define void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 {
151  %dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
152  %bc = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)*
153  %val = load i32, i32 addrspace(2)* %bc
154  store i32 %val, i32 addrspace(1)* %ptr
155  ret void
156}
157
158; HSA: define void @use_queue_ptr(i32 addrspace(1)* %ptr) #11 {
159define void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 {
160  %dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.queue.ptr()
161  %bc = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)*
162  %val = load i32, i32 addrspace(2)* %bc
163  store i32 %val, i32 addrspace(1)* %ptr
164  ret void
165}
166
167; HSA: define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #11 {
168define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
169  %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
170  store volatile i32 0, i32 addrspace(4)* %stof
171  ret void
172}
173
174; HSA: define void @use_private_to_flat_addrspacecast(i32* %ptr) #11 {
175define void @use_private_to_flat_addrspacecast(i32* %ptr) #1 {
176  %stof = addrspacecast i32* %ptr to i32 addrspace(4)*
177  store volatile i32 0, i32 addrspace(4)* %stof
178  ret void
179}
180
181; HSA: define void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #1 {
182define void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #1 {
183  %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(3)*
184  store volatile i32 0, i32 addrspace(3)* %ftos
185  ret void
186}
187
188; HSA: define void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #1 {
189define void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #1 {
190  %ftos = addrspacecast i32 addrspace(4)* %ptr to i32*
191  store volatile i32 0, i32* %ftos
192  ret void
193}
194
195; No-op addrspacecast should not use queue ptr
196; HSA: define void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
197define void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
198  %stof = addrspacecast i32 addrspace(1)* %ptr to i32 addrspace(4)*
199  store volatile i32 0, i32 addrspace(4)* %stof
200  ret void
201}
202
203; HSA: define void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 {
204define void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 {
205  %stof = addrspacecast i32 addrspace(2)* %ptr to i32 addrspace(4)*
206  %ld = load volatile i32, i32 addrspace(4)* %stof
207  ret void
208}
209
210; HSA: define void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #1 {
211define void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #1 {
212  %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)*
213  store volatile i32 0, i32 addrspace(1)* %ftos
214  ret void
215}
216
217; HSA: define void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #1 {
218define void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #1 {
219  %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(2)*
220  %ld = load volatile i32, i32 addrspace(2)* %ftos
221  ret void
222}
223
224attributes #0 = { nounwind readnone }
225attributes #1 = { nounwind }
226
227; HSA: attributes #0 = { nounwind readnone }
228; HSA: attributes #1 = { nounwind }
229; HSA: attributes #2 = { nounwind "amdgpu-work-group-id-y" }
230; HSA: attributes #3 = { nounwind "amdgpu-work-group-id-z" }
231; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" }
232; HSA: attributes #5 = { nounwind "amdgpu-work-item-id-y" }
233; HSA: attributes #6 = { nounwind "amdgpu-work-item-id-z" }
234; HSA: attributes #7 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" }
235; HSA: attributes #8 = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
236; HSA: attributes #9 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
237; HSA: attributes #10 = { nounwind "amdgpu-dispatch-ptr" }
238; HSA: attributes #11 = { nounwind "amdgpu-queue-ptr" }
239