1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
2
3; CHECK-LABEL: {{^}}test_kill_depth_0_imm_pos:
4; CHECK-NEXT: ; BB#0:
5; CHECK-NEXT: s_endpgm
6define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 {
7  call void @llvm.AMDGPU.kill(float 0.0)
8  ret void
9}
10
11; CHECK-LABEL: {{^}}test_kill_depth_0_imm_neg:
12; CHECK-NEXT: ; BB#0:
13; CHECK-NEXT: s_mov_b64 exec, 0
14; CHECK-NEXT: ; BB#1:
15; CHECK-NEXT: s_endpgm
16define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 {
17  call void @llvm.AMDGPU.kill(float -0.0)
18  ret void
19}
20
21; FIXME: Ideally only one would be emitted
22; CHECK-LABEL: {{^}}test_kill_depth_0_imm_neg_x2:
23; CHECK-NEXT: ; BB#0:
24; CHECK-NEXT: s_mov_b64 exec, 0
25; CHECK-NEXT: ; BB#1:
26; CHECK-NEXT: s_mov_b64 exec, 0
27; CHECK-NEXT: ; BB#2:
28; CHECK-NEXT: s_endpgm
29define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 {
30  call void @llvm.AMDGPU.kill(float -0.0)
31  call void @llvm.AMDGPU.kill(float -1.0)
32  ret void
33}
34
35; CHECK-LABEL: {{^}}test_kill_depth_var:
36; CHECK-NEXT: ; BB#0:
37; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0
38; CHECK-NEXT: ; BB#1:
39; CHECK-NEXT: s_endpgm
40define amdgpu_ps void @test_kill_depth_var(float %x) #0 {
41  call void @llvm.AMDGPU.kill(float %x)
42  ret void
43}
44
45; FIXME: Ideally only one would be emitted
46; CHECK-LABEL: {{^}}test_kill_depth_var_x2_same:
47; CHECK-NEXT: ; BB#0:
48; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0
49; CHECK-NEXT: ; BB#1:
50; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0
51; CHECK-NEXT: ; BB#2:
52; CHECK-NEXT: s_endpgm
53define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 {
54  call void @llvm.AMDGPU.kill(float %x)
55  call void @llvm.AMDGPU.kill(float %x)
56  ret void
57}
58
59; CHECK-LABEL: {{^}}test_kill_depth_var_x2:
60; CHECK-NEXT: ; BB#0:
61; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0
62; CHECK-NEXT: ; BB#1:
63; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v1
64; CHECK-NEXT: ; BB#2:
65; CHECK-NEXT: s_endpgm
66define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 {
67  call void @llvm.AMDGPU.kill(float %x)
68  call void @llvm.AMDGPU.kill(float %y)
69  ret void
70}
71
72; CHECK-LABEL: {{^}}test_kill_depth_var_x2_instructions:
73; CHECK-NEXT: ; BB#0:
74; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0
75; CHECK-NEXT: ; BB#1:
76; CHECK: v_mov_b32_e64 v7, -1
77; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7
78; CHECK-NEXT: ; BB#2:
79; CHECK-NEXT: s_endpgm
80define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 {
81  call void @llvm.AMDGPU.kill(float %x)
82  %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={VGPR7}"()
83  call void @llvm.AMDGPU.kill(float %y)
84  ret void
85}
86
87; FIXME: why does the skip depend on the asm length in the same block?
88
89; CHECK-LABEL: {{^}}test_kill_control_flow:
90; CHECK: s_cmp_lg_i32 s{{[0-9]+}}, 0
91; CHECK: s_cbranch_scc1 [[RETURN_BB:BB[0-9]+_[0-9]+]]
92
93; CHECK-NEXT: ; BB#1:
94; CHECK: v_mov_b32_e64 v7, -1
95; CHECK: v_nop_e64
96; CHECK: v_nop_e64
97; CHECK: v_nop_e64
98; CHECK: v_nop_e64
99; CHECK: v_nop_e64
100; CHECK: v_nop_e64
101; CHECK: v_nop_e64
102; CHECK: v_nop_e64
103; CHECK: v_nop_e64
104; CHECK: v_nop_e64
105
106; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7
107; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]]
108; CHECK-NEXT: ; BB#3:
109; CHECK-NEXT: exp 0, 9, 0, 1, 1, v0, v0, v0, v0
110; CHECK-NEXT: s_endpgm
111
112; CHECK-NEXT: {{^}}[[SPLIT_BB]]:
113; CHECK-NEXT: s_endpgm
114define amdgpu_ps void @test_kill_control_flow(i32 inreg %arg) #0 {
115entry:
116  %cmp = icmp eq i32 %arg, 0
117  br i1 %cmp, label %bb, label %exit
118
119bb:
120  %var = call float asm sideeffect "
121    v_mov_b32_e64 v7, -1
122    v_nop_e64
123    v_nop_e64
124    v_nop_e64
125    v_nop_e64
126    v_nop_e64
127    v_nop_e64
128    v_nop_e64
129    v_nop_e64
130    v_nop_e64
131    v_nop_e64", "={VGPR7}"()
132  call void @llvm.AMDGPU.kill(float %var)
133  br label %exit
134
135exit:
136  ret void
137}
138
139; CHECK-LABEL: {{^}}test_kill_control_flow_remainder:
140; CHECK: s_cmp_lg_i32 s{{[0-9]+}}, 0
141; CHECK-NEXT: s_cbranch_scc1 [[RETURN_BB:BB[0-9]+_[0-9]+]]
142
143; CHECK-NEXT: ; BB#1: ; %bb
144; CHECK: v_mov_b32_e64 v7, -1
145; CHECK: v_nop_e64
146; CHECK: v_nop_e64
147; CHECK: v_nop_e64
148; CHECK: v_nop_e64
149; CHECK: v_nop_e64
150; CHECK: v_nop_e64
151; CHECK: v_nop_e64
152; CHECK: v_nop_e64
153; CHECK: ;;#ASMEND
154; CHECK: v_mov_b32_e64 v8, -1
155; CHECK: ;;#ASMEND
156; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7
157; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]]
158
159; CHECK-NEXT: ; BB#4:
160; CHECK-NEXT: exp 0, 9, 0, 1, 1, v0, v0, v0, v0
161; CHECK-NEXT: s_endpgm
162
163; CHECK-NEXT: {{^}}[[SPLIT_BB]]:
164; CHECK: buffer_store_dword v8
165; CHECK: v_mov_b32_e64 v9, -2
166
167; CHECK: {{^}}BB{{[0-9]+_[0-9]+}}:
168; CHECK: buffer_store_dword v9
169; CHECK-NEXT: s_endpgm
170define amdgpu_ps void @test_kill_control_flow_remainder(i32 inreg %arg) #0 {
171entry:
172  %cmp = icmp eq i32 %arg, 0
173  br i1 %cmp, label %bb, label %exit
174
175bb:
176  %var = call float asm sideeffect "
177    v_mov_b32_e64 v7, -1
178    v_nop_e64
179    v_nop_e64
180    v_nop_e64
181    v_nop_e64
182    v_nop_e64
183    v_nop_e64
184    v_nop_e64
185    v_nop_e64
186    v_nop_e64
187    v_nop_e64
188    v_nop_e64", "={VGPR7}"()
189  %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={VGPR8}"()
190  call void @llvm.AMDGPU.kill(float %var)
191  store volatile float %live.across, float addrspace(1)* undef
192  %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={VGPR9}"()
193  br label %exit
194
195exit:
196  %phi = phi float [ 0.0, %entry ], [ %live.out, %bb ]
197  store float %phi, float addrspace(1)* undef
198  ret void
199}
200
201; CHECK-LABEL: {{^}}test_kill_divergent_loop:
202; CHECK: v_cmp_eq_i32_e32 vcc, 0, v0
203; CHECK-NEXT: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], vcc
204; CHECK-NEXT: s_xor_b64 [[SAVEEXEC]], exec, [[SAVEEXEC]]
205; CHECK-NEXT: s_cbranch_execz [[EXIT:BB[0-9]+_[0-9]+]]
206; CHECK-NEXT: ; mask branch [[EXIT]]
207
208; CHECK: [[LOOP_BB:BB[0-9]+_[0-9]+]]:
209
210; CHECK: v_mov_b32_e64 v7, -1
211; CHECK: v_nop_e64
212; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7
213
214; CHECK-NEXT: ; BB#3:
215; CHECK: buffer_load_dword [[LOAD:v[0-9]+]]
216; CHECK: v_cmp_eq_i32_e32 vcc, 0, [[LOAD]]
217; CHECK-NEXT: s_and_b64 vcc, exec, vcc
218; CHECK-NEXT: s_cbranch_vccnz [[LOOP_BB]]
219
220; CHECK-NEXT: {{^}}[[EXIT]]:
221; CHECK: s_or_b64 exec, exec, [[SAVEEXEC]]
222; CHECK: buffer_store_dword
223; CHECK: s_endpgm
224define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 {
225entry:
226  %cmp = icmp eq i32 %arg, 0
227  br i1 %cmp, label %bb, label %exit
228
229bb:
230  %var = call float asm sideeffect "
231    v_mov_b32_e64 v7, -1
232    v_nop_e64
233    v_nop_e64
234    v_nop_e64
235    v_nop_e64
236    v_nop_e64
237    v_nop_e64
238    v_nop_e64
239    v_nop_e64
240    v_nop_e64
241    v_nop_e64", "={VGPR7}"()
242  call void @llvm.AMDGPU.kill(float %var)
243  %vgpr = load volatile i32, i32 addrspace(1)* undef
244  %loop.cond = icmp eq i32 %vgpr, 0
245  br i1 %loop.cond, label %bb, label %exit
246
247exit:
248  store volatile i32 8, i32 addrspace(1)* undef
249  ret void
250}
251
252
253declare void @llvm.AMDGPU.kill(float) #0
254
255attributes #0 = { nounwind }
256