From f577de357a6388fb536db3fe99c965f6416eea80 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 7 Nov 2016 19:09:33 +0000 Subject: [PATCH] AMDGPU: Remove unnecessary and on conditional branch The comment explaining why this was necessary is incorrect in its description of v_cmp's behavior for inactive workitems. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@286134 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 18 ++---------------- test/CodeGen/AMDGPU/branch-relaxation.ll | 15 +++++++-------- test/CodeGen/AMDGPU/cf-loop-on-constant.ll | 1 - test/CodeGen/AMDGPU/cgp-bitfield-extract.ll | 3 +-- test/CodeGen/AMDGPU/i1-copy-implicit-def.ll | 3 +-- test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll | 1 - test/CodeGen/AMDGPU/skip-if-dead.ll | 10 +++------- test/CodeGen/AMDGPU/smrd-vccz-bug.ll | 4 +--- test/CodeGen/AMDGPU/uniform-cfg.ll | 8 +------- test/CodeGen/AMDGPU/wqm.ll | 5 ++--- 10 files changed, 18 insertions(+), 50 deletions(-) diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 2b3b3fd28b4..4d55a663d48 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1407,26 +1407,12 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { return; } - // The result of VOPC instructions is or'd against ~EXEC before it is - // written to vcc or another SGPR. This means that the value '1' is always - // written to the corresponding bit for results that are masked. In order - // to correctly check against vccz, we need to and VCC with the EXEC - // register in order to clear the value from the masked bits. - SDLoc SL(N); - SDNode *MaskedCond = - CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1, - CurDAG->getRegister(AMDGPU::EXEC, MVT::i1), - Cond); - SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, - SDValue(MaskedCond, 0), - SDValue()); // Passing SDValue() adds a - // glue output. + SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, Cond); CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other, N->getOperand(2), // Basic Block - VCC.getValue(0), // Chain - VCC.getValue(1)); // Glue + VCC.getValue(0)); return; } diff --git a/test/CodeGen/AMDGPU/branch-relaxation.ll b/test/CodeGen/AMDGPU/branch-relaxation.ll index 5a703f8acfb..c298911504a 100644 --- a/test/CodeGen/AMDGPU/branch-relaxation.ll +++ b/test/CodeGen/AMDGPU/branch-relaxation.ll @@ -89,10 +89,9 @@ bb3: ; GCN-LABEL: {{^}}uniform_conditional_min_long_forward_vcnd_branch: ; GCN: s_load_dword [[CND:s[0-9]+]] -; GCN-DAG: v_cmp_eq_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[CND]], 0 ; GCN-DAG: v_mov_b32_e32 [[V_CND:v[0-9]+]], [[CND]] -; GCN: s_and_b64 vcc, exec, [[CMP]] -; GCN-NEXT: s_cbranch_vccz [[LONGBB:BB[0-9]+_[0-9]+]] +; GCN-DAG: v_cmp_eq_f32_e64 vcc, [[CND]], 0 +; GCN: s_cbranch_vccz [[LONGBB:BB[0-9]+_[0-9]+]] ; GCN-NEXT: [[LONG_JUMP:BB[0-9]+_[0-9]+]]: ; %bb0 ; GCN-NEXT: s_getpc_b64 vcc @@ -434,7 +433,7 @@ endif: ; GCN: v_nop_e64 ; GCN: v_nop_e64 ; GCN: ;;#ASMEND -; GCN-NEXT: s_and_b64 vcc, exec, -1{{$}} +; GCN-NEXT: s_mov_b64 vcc, -1{{$}} ; GCN-NEXT: s_cbranch_vccz [[RET]] ; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %loop_body @@ -478,14 +477,13 @@ ret: ; GCN-LABEL: {{^}}long_branch_hang: ; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 6 ; GCN-NEXT: s_cbranch_scc1 [[LONG_BR_0:BB[0-9]+_[0-9]+]] - -; GCN: s_add_u32 vcc_lo, vcc_lo, [[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-( -; GCN: s_setpc_b64 +; GCN-NEXT: s_branch [[SHORTB:BB[0-9]+_[0-9]+]] ; GCN-NEXT: [[LONG_BR_0]]: +; GCN: s_add_u32 vcc_lo, vcc_lo, [[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-( ; GCN: s_setpc_b64 -; GCN-NEXT: [[LONG_BR_DEST0]]: +; GCN: [[SHORTB]]: ; GCN-DAG: v_cmp_lt_i32 ; GCN-DAG: v_cmp_gt_i32 ; GCN: s_cbranch_vccnz @@ -493,6 +491,7 @@ ret: ; GCN: s_setpc_b64 ; GCN: s_setpc_b64 +; GCN: [[LONG_BR_DEST0]] ; GCN: s_cmp_eq_u32 ; GCN-NEXT: s_cbranch_scc0 ; GCN: s_setpc_b64 diff --git a/test/CodeGen/AMDGPU/cf-loop-on-constant.ll b/test/CodeGen/AMDGPU/cf-loop-on-constant.ll index a68310c6083..b112c49b69e 100644 --- a/test/CodeGen/AMDGPU/cf-loop-on-constant.ll +++ b/test/CodeGen/AMDGPU/cf-loop-on-constant.ll @@ -97,7 +97,6 @@ for.body: ; GCN: v_and_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} ; GCN: v_cmp_eq_u32_e32 vcc, 1, -; GCN: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, exec, vcc ; GCN: [[LOOPBB:BB[0-9]+_[0-9]+]] ; GCN: s_cbranch_vccnz [[LOOPBB]] ; GCN-NEXT: ; BB#2 diff --git a/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll b/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll index 33daf0292ae..694c67fc5da 100644 --- a/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll +++ b/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll @@ -170,13 +170,12 @@ ret: ; OPT: ret ; GCN-LABEL: {{^}}sink_ubfe_i64_span_midpoint: +; GCN: s_lshr_b64 s{{\[}}[[LO:[0-9]+]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, 30 ; GCN: s_cbranch_vccnz BB3_2 -; GCN: s_lshr_b64 s{{\[}}[[LO:[0-9]+]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, 30 ; GCN: s_and_b32 s{{[0-9]+}}, s[[LO]], 0xff ; GCN: BB3_2: -; GCN: s_lshr_b64 s{{\[}}[[LO:[0-9]+]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, 30 ; GCN: s_and_b32 s{{[0-9]+}}, s[[LO]], 0x7f ; GCN: BB3_3: diff --git a/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll b/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll index d6309a2dd5d..5a2130006ab 100644 --- a/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll +++ b/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll @@ -1,10 +1,9 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SILowerI1Copies was not handling IMPLICIT_DEF ; SI-LABEL: {{^}}br_implicit_def: ; SI: BB#0: -; SI-NEXT: s_and_b64 vcc, exec ; SI-NEXT: s_cbranch_vccnz define void @br_implicit_def(i32 addrspace(1)* %out, i32 %arg) #0 { bb: diff --git a/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll b/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll index 1ffb2f839b1..c61f2b0744d 100644 --- a/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll +++ b/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll @@ -37,7 +37,6 @@ bb5: ; preds = %bb3, %bb1 ; OPT-NOT: call i1 @llvm.amdgcn.loop ; GCN-LABEL: {{^}}annotate_ret_noloop: -; GCN: s_and_b64 vcc ; GCN: s_cbranch_vccnz ; GCN: s_endpgm ; GCN: .Lfunc_end1 diff --git a/test/CodeGen/AMDGPU/skip-if-dead.ll b/test/CodeGen/AMDGPU/skip-if-dead.ll index 8c8f724e54a..3ab0ee15e3d 100644 --- a/test/CodeGen/AMDGPU/skip-if-dead.ll +++ b/test/CodeGen/AMDGPU/skip-if-dead.ll @@ -262,13 +262,11 @@ exit: ; CHECK-NEXT: s_endpgm ; CHECK: [[KILLBB:BB[0-9]+_[0-9]+]]: -; CHECK: s_and_b64 vcc, exec, ; CHECK-NEXT: s_cbranch_vccz [[PHIBB:BB[0-9]+_[0-9]+]] ; CHECK: [[PHIBB]]: ; CHECK: v_cmp_eq_f32_e32 vcc, 0, [[PHIREG]] -; CHECK: s_and_b64 vcc, exec, vcc -; CHECK: s_cbranch_vccz [[ENDBB:BB[0-9]+_[0-9]+]] +; CHECK-NEXT: s_cbranch_vccz [[ENDBB:BB[0-9]+_[0-9]+]] ; CHECK: ; %bb10 ; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 9 @@ -303,16 +301,14 @@ end: ; CHECK-LABEL: {{^}}no_skip_no_successors: ; CHECK: v_cmp_nge_f32 -; CHECK: s_and_b64 vcc, exec, -; CHECK: s_cbranch_vccz [[SKIPKILL:BB[0-9]+_[0-9]+]] +; CHECK-NEXT: s_cbranch_vccz [[SKIPKILL:BB[0-9]+_[0-9]+]] ; CHECK: ; %bb6 ; CHECK: s_mov_b64 exec, 0 ; CHECK: [[SKIPKILL]]: ; CHECK: v_cmp_nge_f32 -; CHECK: s_and_b64 vcc, exec, vcc -; CHECK: s_cbranch_vccz [[UNREACHABLE:BB[0-9]+_[0-9]+]] +; CHECK-NEXT: s_cbranch_vccz [[UNREACHABLE:BB[0-9]+_[0-9]+]] ; CHECK: [[UNREACHABLE]]: ; CHECK-NEXT: .Lfunc_end{{[0-9]+}} diff --git a/test/CodeGen/AMDGPU/smrd-vccz-bug.ll b/test/CodeGen/AMDGPU/smrd-vccz-bug.ll index 966121a48cf..f82ac041969 100644 --- a/test/CodeGen/AMDGPU/smrd-vccz-bug.ll +++ b/test/CodeGen/AMDGPU/smrd-vccz-bug.ll @@ -4,8 +4,7 @@ ; GCN-FUNC: {{^}}vccz_workaround: ; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x0 -; GCN: v_cmp_neq_f32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} -; GCN: s_and_b64 vcc, exec, [[MASK]] +; GCN: v_cmp_neq_f32_e64 vcc, s{{[0-9]+}}, 0{{$}} ; GCN: s_waitcnt lgkmcnt(0) ; VCCZ-BUG: s_mov_b64 vcc, vcc ; NOVCCZ-BUG-NOT: s_mov_b64 vcc, vcc @@ -29,7 +28,6 @@ endif: ; GCN-FUNC: {{^}}vccz_noworkaround: ; GCN: v_cmp_neq_f32_e32 vcc, 0, v{{[0-9]+}} -; GCN: s_and_b64 vcc, exec, vcc ; GCN: s_cbranch_vccnz [[EXIT:[0-9A-Za-z_]+]] ; GCN: buffer_store_dword ; GCN: [[EXIT]]: diff --git a/test/CodeGen/AMDGPU/uniform-cfg.ll b/test/CodeGen/AMDGPU/uniform-cfg.ll index a3033b6be87..a462f148847 100644 --- a/test/CodeGen/AMDGPU/uniform-cfg.ll +++ b/test/CodeGen/AMDGPU/uniform-cfg.ll @@ -32,7 +32,6 @@ done: ; FIXME: We could use _e32 here if we re-used the 0 from [[STORE_VAL]], and ; also scheduled the write first. ; GCN-DAG: v_cmp_eq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} -; GCN-DAG: s_and_b64 vcc, exec, [[COND]] ; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 ; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] @@ -89,7 +88,6 @@ done: ; FIXME: We could use _e32 here if we re-used the 0 from [[STORE_VAL]], and ; also scheduled the write first. ; GCN-DAG: v_cmp_neq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} -; GCN-DAG: s_and_b64 vcc, exec, [[COND]] ; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 ; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] @@ -253,8 +251,7 @@ ENDIF: ; preds = %IF, %main_body ; GCN: s_load_dword [[COND:s[0-9]+]] ; GCN: s_cmp_lt_i32 [[COND]], 1 ; GCN: s_cbranch_scc1 [[EXIT:[A-Za-z0-9_]+]] -; GCN: v_cmp_gt_i32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], [[COND]], 0{{$}} -; GCN: s_and_b64 vcc, exec, [[MASK]] +; GCN: v_cmp_gt_i32_e64 vcc, [[COND]], 0{{$}} ; GCN: s_cbranch_vccnz [[EXIT]] ; GCN: buffer_store ; GCN: {{^}}[[EXIT]]: @@ -439,7 +436,6 @@ bb9: ; preds = %bb8, %bb4 ; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 ; SI: v_cmp_eq_u64_e64 -; SI: s_and_b64 vcc, exec, ; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] ; VI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]] @@ -471,7 +467,6 @@ done: ; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 ; SI: v_cmp_ne_u64_e64 -; SI: s_and_b64 vcc, exec, ; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] ; VI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]] @@ -500,7 +495,6 @@ done: ; GCN-LABEL: {{^}}uniform_if_scc_i64_sgt: ; GCN: v_cmp_gt_i64_e64 -; GCN: s_and_b64 vcc, exec, ; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] ; Fall-through to the else diff --git a/test/CodeGen/AMDGPU/wqm.ll b/test/CodeGen/AMDGPU/wqm.ll index 14c279b3b95..4def7357efe 100644 --- a/test/CodeGen/AMDGPU/wqm.ll +++ b/test/CodeGen/AMDGPU/wqm.ll @@ -355,12 +355,11 @@ main_body: ; CHECK: s_wqm_b64 exec, exec ; CHECK-DAG: v_mov_b32_e32 [[CTR:v[0-9]+]], 0 ; CHECK-DAG: v_mov_b32_e32 [[SEVEN:v[0-9]+]], 0x40e00000 -; CHECK: s_branch [[LOOPHDR:BB[0-9]+_[0-9]+]] +; CHECK: [[LOOPHDR:BB[0-9]+_[0-9]+]]: ; %body ; CHECK: v_add_f32_e32 [[CTR]], 2.0, [[CTR]] -; CHECK: [[LOOPHDR]]: ; %loop ; CHECK: v_cmp_lt_f32_e32 vcc, [[SEVEN]], [[CTR]] -; CHECK: s_cbranch_vccz +; CHECK: s_cbranch_vccz [[LOOPHDR]] ; CHECK: ; %break ; CHECK: ; return -- 2.40.0