From 1a05d247fa7a220f415a8f7b44ebdc3006f1c88e Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Fri, 4 Aug 2017 01:09:43 +0000 Subject: [PATCH] [AMDGPU] Add missing hazard for DPP-after-EXEC-write Summary: Following the docs, we need at least 5 wait states between an EXEC write and an instruction that uses DPP. Reviewers: tstellar, arsenm Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D34849 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@310013 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 9 ++++- test/CodeGen/AMDGPU/inserted-wait-states.mir | 35 ++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index cd9e7fb04f1..b601cfeded1 100644 --- a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -367,10 +367,13 @@ int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) { int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) { const SIRegisterInfo *TRI = ST.getRegisterInfo(); + const SIInstrInfo *TII = ST.getInstrInfo(); - // Check for DPP VGPR read after VALU VGPR write. + // Check for DPP VGPR read after VALU VGPR write and EXEC write. int DppVgprWaitStates = 2; + int DppExecWaitStates = 5; int WaitStatesNeeded = 0; + auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; for (const MachineOperand &Use : DPP->uses()) { if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg())) @@ -380,6 +383,10 @@ int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) { WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); } + WaitStatesNeeded = std::max( + WaitStatesNeeded, + DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn)); + return WaitStatesNeeded; } diff --git a/test/CodeGen/AMDGPU/inserted-wait-states.mir b/test/CodeGen/AMDGPU/inserted-wait-states.mir index c6fe6debd22..bf4c7cbe6ee 100644 --- a/test/CodeGen/AMDGPU/inserted-wait-states.mir +++ b/test/CodeGen/AMDGPU/inserted-wait-states.mir @@ -13,6 +13,7 @@ define amdgpu_kernel void @s_mov_fed_b32() { ret void } define amdgpu_kernel void @s_movrel() { ret void } define amdgpu_kernel void @v_interp() { ret void } + define amdgpu_kernel void @dpp() { ret void } define amdgpu_kernel void @mov_fed_hazard_crash_on_dbg_value(i32 addrspace(1)* %A) { entry: @@ -477,6 +478,40 @@ body: | %vgpr0 = V_INTERP_MOV_F32 0, 0, 0, implicit %m0, implicit %exec S_ENDPGM ... + +... +--- + +# GCN-LABEL: name: dpp + +# VI-LABEL: bb.0: +# VI: V_MOV_B32_e32 +# VI-NEXT: S_NOP 0 +# VI-NEXT: S_NOP 0 +# VI-NEXT: V_MOV_B32_dpp + +# VI-LABEL: bb.1: +# VI: V_CMPX_EQ_I32_e32 +# VI-NEXT: S_NOP 0 +# VI-NEXT: S_NOP 0 +# VI-NEXT: S_NOP 0 +# VI-NEXT: S_NOP 0 +# VI-NEXT: S_NOP 0 +# VI-NEXT: V_MOV_B32_dpp + +name: dpp + +body: | + bb.0: + %vgpr0 = V_MOV_B32_e32 0, implicit %exec + %vgpr1 = V_MOV_B32_dpp %vgpr0, 0, 15, 15, 0, implicit %exec + S_BRANCH %bb.1 + + bb.1: + implicit %exec, implicit %vcc = V_CMPX_EQ_I32_e32 %vgpr0, %vgpr1, implicit %exec + %vgpr3 = V_MOV_B32_dpp %vgpr0, 0, 15, 15, 0, implicit %exec + S_ENDPGM +... --- name: mov_fed_hazard_crash_on_dbg_value alignment: 0 -- 2.50.1