1; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3
4declare i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* nocapture, i32) #2
5declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32) #2
6declare i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* nocapture, i32) #2
7
8declare i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* nocapture, i64) #2
9declare i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* nocapture, i64) #2
10declare i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* nocapture, i64) #2
11
12declare i32 @llvm.amdgcn.workitem.id.x() #1
13
14; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32:
15; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
16; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
17define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
18  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42)
19  store i32 %result, i32 addrspace(1)* %out
20  ret void
21}
22
23; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32_offset:
24; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
25; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16
26define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
27  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
28  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %gep, i32 42)
29  store i32 %result, i32 addrspace(1)* %out
30  ret void
31}
32
33; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32:
34; GCN: s_load_dword [[SPTR:s[0-9]+]],
35; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
36; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
37; GCN: ds_dec_u32 [[VPTR]], [[DATA]]
38define void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind {
39  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42)
40  ret void
41}
42
43; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32_offset:
44; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
45; GCN: ds_dec_u32 v{{[0-9]+}}, [[K]] offset:16
46define void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
47  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
48  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %gep, i32 42)
49  ret void
50}
51
52; GCN-LABEL: {{^}}global_atomic_dec_ret_i32:
53; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
54; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
55define void @global_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
56  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42)
57  store i32 %result, i32 addrspace(1)* %out
58  ret void
59}
60
61; GCN-LABEL: {{^}}global_atomic_dec_ret_i32_offset:
62; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
63; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
64define void @global_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
65  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
66  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
67  store i32 %result, i32 addrspace(1)* %out
68  ret void
69}
70
71; FUNC-LABEL: {{^}}global_atomic_dec_noret_i32:
72; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
73define void @global_atomic_dec_noret_i32(i32 addrspace(1)* %ptr) nounwind {
74  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42)
75  ret void
76}
77
78; FUNC-LABEL: {{^}}global_atomic_dec_noret_i32_offset:
79; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
80; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
81define void @global_atomic_dec_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind {
82  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
83  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
84  ret void
85}
86
87; GCN-LABEL: {{^}}global_atomic_dec_ret_i32_offset_addr64:
88; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
89; CI: buffer_atomic_dec [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}}
90; VI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
91define void @global_atomic_dec_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
92  %id = call i32 @llvm.amdgcn.workitem.id.x()
93  %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
94  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id
95  %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
96  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
97  store i32 %result, i32 addrspace(1)* %out.gep
98  ret void
99}
100
101; GCN-LABEL: {{^}}global_atomic_dec_noret_i32_offset_addr64:
102; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
103; CI: buffer_atomic_dec [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}}
104; VI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
105define void @global_atomic_dec_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 {
106  %id = call i32 @llvm.amdgcn.workitem.id.x()
107  %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
108  %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
109  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
110  ret void
111}
112
113; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32:
114; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
115; GCN: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
116define void @flat_atomic_dec_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 {
117  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %ptr, i32 42)
118  store i32 %result, i32 addrspace(4)* %out
119  ret void
120}
121
122; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32_offset:
123; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
124; GCN: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
125define void @flat_atomic_dec_ret_i32_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 {
126  %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
127  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42)
128  store i32 %result, i32 addrspace(4)* %out
129  ret void
130}
131
132; FUNC-LABEL: {{^}}flat_atomic_dec_noret_i32:
133; GCN: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
134define void @flat_atomic_dec_noret_i32(i32 addrspace(4)* %ptr) nounwind {
135  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %ptr, i32 42)
136  ret void
137}
138
139; FUNC-LABEL: {{^}}flat_atomic_dec_noret_i32_offset:
140; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
141; GCN: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
142define void @flat_atomic_dec_noret_i32_offset(i32 addrspace(4)* %ptr) nounwind {
143  %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
144  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42)
145  ret void
146}
147
148; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32_offset_addr64:
149; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
150; GCN: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
151define void @flat_atomic_dec_ret_i32_offset_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 {
152  %id = call i32 @llvm.amdgcn.workitem.id.x()
153  %gep.tid = getelementptr i32, i32 addrspace(4)* %ptr, i32 %id
154  %out.gep = getelementptr i32, i32 addrspace(4)* %out, i32 %id
155  %gep = getelementptr i32, i32 addrspace(4)* %gep.tid, i32 5
156  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42)
157  store i32 %result, i32 addrspace(4)* %out.gep
158  ret void
159}
160
161; GCN-LABEL: {{^}}flat_atomic_dec_noret_i32_offset_addr64:
162; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
163; GCN: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
164define void @flat_atomic_dec_noret_i32_offset_addr64(i32 addrspace(4)* %ptr) #0 {
165  %id = call i32 @llvm.amdgcn.workitem.id.x()
166  %gep.tid = getelementptr i32, i32 addrspace(4)* %ptr, i32 %id
167  %gep = getelementptr i32, i32 addrspace(4)* %gep.tid, i32 5
168  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42)
169  ret void
170}
171
172; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64:
173; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
174; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
175; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
176define void @flat_atomic_dec_ret_i64(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 {
177  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %ptr, i64 42)
178  store i64 %result, i64 addrspace(4)* %out
179  ret void
180}
181
182; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64_offset:
183; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
184; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
185; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
186define void @flat_atomic_dec_ret_i64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 {
187  %gep = getelementptr i64, i64 addrspace(4)* %ptr, i32 4
188  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42)
189  store i64 %result, i64 addrspace(4)* %out
190  ret void
191}
192
193; FUNC-LABEL: {{^}}flat_atomic_dec_noret_i64:
194; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
195; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
196; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
197define void @flat_atomic_dec_noret_i64(i64 addrspace(4)* %ptr) nounwind {
198  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %ptr, i64 42)
199  ret void
200}
201
202; FUNC-LABEL: {{^}}flat_atomic_dec_noret_i64_offset:
203; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
204; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
205; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
206define void @flat_atomic_dec_noret_i64_offset(i64 addrspace(4)* %ptr) nounwind {
207  %gep = getelementptr i64, i64 addrspace(4)* %ptr, i32 4
208  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42)
209  ret void
210}
211
212; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64_offset_addr64:
213; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
214; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
215; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
216define void @flat_atomic_dec_ret_i64_offset_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 {
217  %id = call i32 @llvm.amdgcn.workitem.id.x()
218  %gep.tid = getelementptr i64, i64 addrspace(4)* %ptr, i32 %id
219  %out.gep = getelementptr i64, i64 addrspace(4)* %out, i32 %id
220  %gep = getelementptr i64, i64 addrspace(4)* %gep.tid, i32 5
221  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42)
222  store i64 %result, i64 addrspace(4)* %out.gep
223  ret void
224}
225
226; GCN-LABEL: {{^}}flat_atomic_dec_noret_i64_offset_addr64:
227; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
228; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
229; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
230define void @flat_atomic_dec_noret_i64_offset_addr64(i64 addrspace(4)* %ptr) #0 {
231  %id = call i32 @llvm.amdgcn.workitem.id.x()
232  %gep.tid = getelementptr i64, i64 addrspace(4)* %ptr, i32 %id
233  %gep = getelementptr i64, i64 addrspace(4)* %gep.tid, i32 5
234  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42)
235  ret void
236}
237
238@lds0 = addrspace(3) global [512 x i32] undef
239
240; SI-LABEL: {{^}}atomic_dec_shl_base_lds_0:
241; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
242; SI: ds_dec_rtn_u32 {{v[0-9]+}}, [[PTR]] offset:8
243define void @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
244  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
245  %idx.0 = add nsw i32 %tid.x, 2
246  %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0
247  %val0 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9)
248  store i32 %idx.0, i32 addrspace(1)* %add_use
249  store i32 %val0, i32 addrspace(1)* %out
250  ret void
251}
252
253; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64:
254; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
255; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
256; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
257define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
258  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42)
259  store i64 %result, i64 addrspace(1)* %out
260  ret void
261}
262
263; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64_offset:
264; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
265; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
266; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32
267define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
268  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
269  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42)
270  store i64 %result, i64 addrspace(1)* %out
271  ret void
272}
273
274; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64:
275; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
276; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
277; GCN: ds_dec_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
278define void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind {
279  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42)
280  ret void
281}
282
283; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64_offset:
284; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
285; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
286; GCN: ds_dec_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
287define void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
288  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
289  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42)
290  ret void
291}
292
293; GCN-LABEL: {{^}}global_atomic_dec_ret_i64:
294; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
295; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
296; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
297define void @global_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
298  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42)
299  store i64 %result, i64 addrspace(1)* %out
300  ret void
301}
302
303; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset:
304; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
305; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
306; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
307define void @global_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
308  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
309  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
310  store i64 %result, i64 addrspace(1)* %out
311  ret void
312}
313
314; FUNC-LABEL: {{^}}global_atomic_dec_noret_i64:
315; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
316; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
317; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
318define void @global_atomic_dec_noret_i64(i64 addrspace(1)* %ptr) nounwind {
319  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42)
320  ret void
321}
322
323; FUNC-LABEL: {{^}}global_atomic_dec_noret_i64_offset:
324; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
325; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
326; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
327define void @global_atomic_dec_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
328  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
329  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
330  ret void
331}
332
333; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset_addr64:
334; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
335; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
336; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}}
337; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
338define void @global_atomic_dec_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
339  %id = call i32 @llvm.amdgcn.workitem.id.x()
340  %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
341  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id
342  %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
343  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
344  store i64 %result, i64 addrspace(1)* %out.gep
345  ret void
346}
347
348; GCN-LABEL: {{^}}global_atomic_dec_noret_i64_offset_addr64:
349; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
350; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
351; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}}
352; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
353define void @global_atomic_dec_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 {
354  %id = call i32 @llvm.amdgcn.workitem.id.x()
355  %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
356  %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
357  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
358  ret void
359}
360
361@lds1 = addrspace(3) global [512 x i64] undef, align 8
362
363; GCN-LABEL: {{^}}atomic_dec_shl_base_lds_0_i64:
364; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}}
365; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16
366define void @atomic_dec_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
367  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
368  %idx.0 = add nsw i32 %tid.x, 2
369  %arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0
370  %val0 = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9)
371  store i32 %idx.0, i32 addrspace(1)* %add_use
372  store i64 %val0, i64 addrspace(1)* %out
373  ret void
374}
375
376attributes #0 = { nounwind }
377attributes #1 = { nounwind readnone }
378attributes #2 = { nounwind argmemonly }
379
380
381
382
383
384
385
386
387
388