1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s 2 3; CHECK-LABEL: {{^}}test_kill_depth_0_imm_pos: 4; CHECK-NEXT: ; BB#0: 5; CHECK-NEXT: s_endpgm 6define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 { 7 call void @llvm.AMDGPU.kill(float 0.0) 8 ret void 9} 10 11; CHECK-LABEL: {{^}}test_kill_depth_0_imm_neg: 12; CHECK-NEXT: ; BB#0: 13; CHECK-NEXT: s_mov_b64 exec, 0 14; CHECK-NEXT: ; BB#1: 15; CHECK-NEXT: s_endpgm 16define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 { 17 call void @llvm.AMDGPU.kill(float -0.0) 18 ret void 19} 20 21; FIXME: Ideally only one would be emitted 22; CHECK-LABEL: {{^}}test_kill_depth_0_imm_neg_x2: 23; CHECK-NEXT: ; BB#0: 24; CHECK-NEXT: s_mov_b64 exec, 0 25; CHECK-NEXT: ; BB#1: 26; CHECK-NEXT: s_mov_b64 exec, 0 27; CHECK-NEXT: ; BB#2: 28; CHECK-NEXT: s_endpgm 29define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 { 30 call void @llvm.AMDGPU.kill(float -0.0) 31 call void @llvm.AMDGPU.kill(float -1.0) 32 ret void 33} 34 35; CHECK-LABEL: {{^}}test_kill_depth_var: 36; CHECK-NEXT: ; BB#0: 37; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 38; CHECK-NEXT: ; BB#1: 39; CHECK-NEXT: s_endpgm 40define amdgpu_ps void @test_kill_depth_var(float %x) #0 { 41 call void @llvm.AMDGPU.kill(float %x) 42 ret void 43} 44 45; FIXME: Ideally only one would be emitted 46; CHECK-LABEL: {{^}}test_kill_depth_var_x2_same: 47; CHECK-NEXT: ; BB#0: 48; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 49; CHECK-NEXT: ; BB#1: 50; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 51; CHECK-NEXT: ; BB#2: 52; CHECK-NEXT: s_endpgm 53define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 { 54 call void @llvm.AMDGPU.kill(float %x) 55 call void @llvm.AMDGPU.kill(float %x) 56 ret void 57} 58 59; CHECK-LABEL: {{^}}test_kill_depth_var_x2: 60; CHECK-NEXT: ; BB#0: 61; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 62; CHECK-NEXT: ; BB#1: 63; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v1 64; CHECK-NEXT: ; BB#2: 65; CHECK-NEXT: s_endpgm 66define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 { 67 call void @llvm.AMDGPU.kill(float %x) 68 call void @llvm.AMDGPU.kill(float %y) 69 ret void 70} 71 72; CHECK-LABEL: {{^}}test_kill_depth_var_x2_instructions: 73; CHECK-NEXT: ; BB#0: 74; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 75; CHECK-NEXT: ; BB#1: 76; CHECK: v_mov_b32_e64 v7, -1 77; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 78; CHECK-NEXT: ; BB#2: 79; CHECK-NEXT: s_endpgm 80define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 { 81 call void @llvm.AMDGPU.kill(float %x) 82 %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={VGPR7}"() 83 call void @llvm.AMDGPU.kill(float %y) 84 ret void 85} 86 87; FIXME: why does the skip depend on the asm length in the same block? 88 89; CHECK-LABEL: {{^}}test_kill_control_flow: 90; CHECK: s_cmp_lg_i32 s{{[0-9]+}}, 0 91; CHECK: s_cbranch_scc1 [[RETURN_BB:BB[0-9]+_[0-9]+]] 92 93; CHECK-NEXT: ; BB#1: 94; CHECK: v_mov_b32_e64 v7, -1 95; CHECK: v_nop_e64 96; CHECK: v_nop_e64 97; CHECK: v_nop_e64 98; CHECK: v_nop_e64 99; CHECK: v_nop_e64 100; CHECK: v_nop_e64 101; CHECK: v_nop_e64 102; CHECK: v_nop_e64 103; CHECK: v_nop_e64 104; CHECK: v_nop_e64 105 106; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 107; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]] 108; CHECK-NEXT: ; BB#3: 109; CHECK-NEXT: exp 0, 9, 0, 1, 1, v0, v0, v0, v0 110; CHECK-NEXT: s_endpgm 111 112; CHECK-NEXT: {{^}}[[SPLIT_BB]]: 113; CHECK-NEXT: s_endpgm 114define amdgpu_ps void @test_kill_control_flow(i32 inreg %arg) #0 { 115entry: 116 %cmp = icmp eq i32 %arg, 0 117 br i1 %cmp, label %bb, label %exit 118 119bb: 120 %var = call float asm sideeffect " 121 v_mov_b32_e64 v7, -1 122 v_nop_e64 123 v_nop_e64 124 v_nop_e64 125 v_nop_e64 126 v_nop_e64 127 v_nop_e64 128 v_nop_e64 129 v_nop_e64 130 v_nop_e64 131 v_nop_e64", "={VGPR7}"() 132 call void @llvm.AMDGPU.kill(float %var) 133 br label %exit 134 135exit: 136 ret void 137} 138 139; CHECK-LABEL: {{^}}test_kill_control_flow_remainder: 140; CHECK: s_cmp_lg_i32 s{{[0-9]+}}, 0 141; CHECK-NEXT: s_cbranch_scc1 [[RETURN_BB:BB[0-9]+_[0-9]+]] 142 143; CHECK-NEXT: ; BB#1: ; %bb 144; CHECK: v_mov_b32_e64 v7, -1 145; CHECK: v_nop_e64 146; CHECK: v_nop_e64 147; CHECK: v_nop_e64 148; CHECK: v_nop_e64 149; CHECK: v_nop_e64 150; CHECK: v_nop_e64 151; CHECK: v_nop_e64 152; CHECK: v_nop_e64 153; CHECK: ;;#ASMEND 154; CHECK: v_mov_b32_e64 v8, -1 155; CHECK: ;;#ASMEND 156; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 157; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]] 158 159; CHECK-NEXT: ; BB#4: 160; CHECK-NEXT: exp 0, 9, 0, 1, 1, v0, v0, v0, v0 161; CHECK-NEXT: s_endpgm 162 163; CHECK-NEXT: {{^}}[[SPLIT_BB]]: 164; CHECK: buffer_store_dword v8 165; CHECK: v_mov_b32_e64 v9, -2 166 167; CHECK: {{^}}BB{{[0-9]+_[0-9]+}}: 168; CHECK: buffer_store_dword v9 169; CHECK-NEXT: s_endpgm 170define amdgpu_ps void @test_kill_control_flow_remainder(i32 inreg %arg) #0 { 171entry: 172 %cmp = icmp eq i32 %arg, 0 173 br i1 %cmp, label %bb, label %exit 174 175bb: 176 %var = call float asm sideeffect " 177 v_mov_b32_e64 v7, -1 178 v_nop_e64 179 v_nop_e64 180 v_nop_e64 181 v_nop_e64 182 v_nop_e64 183 v_nop_e64 184 v_nop_e64 185 v_nop_e64 186 v_nop_e64 187 v_nop_e64 188 v_nop_e64", "={VGPR7}"() 189 %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={VGPR8}"() 190 call void @llvm.AMDGPU.kill(float %var) 191 store volatile float %live.across, float addrspace(1)* undef 192 %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={VGPR9}"() 193 br label %exit 194 195exit: 196 %phi = phi float [ 0.0, %entry ], [ %live.out, %bb ] 197 store float %phi, float addrspace(1)* undef 198 ret void 199} 200 201; CHECK-LABEL: {{^}}test_kill_divergent_loop: 202; CHECK: v_cmp_eq_i32_e32 vcc, 0, v0 203; CHECK-NEXT: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], vcc 204; CHECK-NEXT: s_xor_b64 [[SAVEEXEC]], exec, [[SAVEEXEC]] 205; CHECK-NEXT: s_cbranch_execz [[EXIT:BB[0-9]+_[0-9]+]] 206; CHECK-NEXT: ; mask branch [[EXIT]] 207 208; CHECK: [[LOOP_BB:BB[0-9]+_[0-9]+]]: 209 210; CHECK: v_mov_b32_e64 v7, -1 211; CHECK: v_nop_e64 212; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 213 214; CHECK-NEXT: ; BB#3: 215; CHECK: buffer_load_dword [[LOAD:v[0-9]+]] 216; CHECK: v_cmp_eq_i32_e32 vcc, 0, [[LOAD]] 217; CHECK-NEXT: s_and_b64 vcc, exec, vcc 218; CHECK-NEXT: s_cbranch_vccnz [[LOOP_BB]] 219 220; CHECK-NEXT: {{^}}[[EXIT]]: 221; CHECK: s_or_b64 exec, exec, [[SAVEEXEC]] 222; CHECK: buffer_store_dword 223; CHECK: s_endpgm 224define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 { 225entry: 226 %cmp = icmp eq i32 %arg, 0 227 br i1 %cmp, label %bb, label %exit 228 229bb: 230 %var = call float asm sideeffect " 231 v_mov_b32_e64 v7, -1 232 v_nop_e64 233 v_nop_e64 234 v_nop_e64 235 v_nop_e64 236 v_nop_e64 237 v_nop_e64 238 v_nop_e64 239 v_nop_e64 240 v_nop_e64 241 v_nop_e64", "={VGPR7}"() 242 call void @llvm.AMDGPU.kill(float %var) 243 %vgpr = load volatile i32, i32 addrspace(1)* undef 244 %loop.cond = icmp eq i32 %vgpr, 0 245 br i1 %loop.cond, label %bb, label %exit 246 247exit: 248 store volatile i32 8, i32 addrspace(1)* undef 249 ret void 250} 251 252 253declare void @llvm.AMDGPU.kill(float) #0 254 255attributes #0 = { nounwind } 256