From aff40a3f52ef2cea7db296a21e516a2586879b66 Mon Sep 17 00:00:00 2001 From: Carl Ritson Date: Mon, 20 May 2019 07:20:12 +0000 Subject: [PATCH] [AMDGPU] gfx1010 Avoid SMEM WAR hazard for some s_waitcnt values Summary: Avoid introducing hazard mitigation when lgkmcnt is reduced to 0. Clarify code comments to explain assumptions made for this hazard mitigation. Expand and correct test cases to cover variants of s_waitcnt. Reviewers: nhaehnle, rampitec Subscribers: arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62058 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@361124 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 28 +++-- test/CodeGen/AMDGPU/smem-war-hazard.mir | 123 ++++++++++++++++++++-- 2 files changed, 138 insertions(+), 13 deletions(-) diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 3bf969196a2..87a8c06e697 100644 --- a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -901,6 +901,7 @@ bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) { const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); + const AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(ST.getCPU()); const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName); if (!SDST) { for (const auto &MO : MI->implicit_operands()) { @@ -919,22 +920,37 @@ bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) { return SIInstrInfo::isSMRD(*I) && I->readsRegister(SDSTReg, TRI); }; - // This assumes that there will be s_waitcnt lgkmcnt(0) or equivalent - // between any at risk SMEM and any SALU dependent on the SMEM results. - auto IsExpiredFn = [TII] (MachineInstr *MI, int) { + auto IsExpiredFn = [TII, IV] (MachineInstr *MI, int) { if (MI) { if (TII->isSALU(*MI)) { - if (TII->isSOPP(*MI)) - return false; switch (MI->getOpcode()) { case AMDGPU::S_SETVSKIP: case AMDGPU::S_VERSION: case AMDGPU::S_WAITCNT_VSCNT: case AMDGPU::S_WAITCNT_VMCNT: case AMDGPU::S_WAITCNT_EXPCNT: - case AMDGPU::S_WAITCNT_LGKMCNT: + // These instructions cannot not mitigate the hazard. return false; + case AMDGPU::S_WAITCNT_LGKMCNT: + // Reducing lgkmcnt count to 0 always mitigates the hazard. + return (MI->getOperand(1).getImm() == 0) && + (MI->getOperand(0).getReg() == AMDGPU::SGPR_NULL); + case AMDGPU::S_WAITCNT: { + const int64_t Imm = MI->getOperand(0).getImm(); + AMDGPU::Waitcnt Decoded = AMDGPU::decodeWaitcnt(IV, Imm); + return (Decoded.LgkmCnt == 0); + } default: + // SOPP instructions cannot mitigate the hazard. + if (TII->isSOPP(*MI)) + return false; + // At this point the SALU can be assumed to mitigate the hazard + // because either: + // (a) it is independent of the at risk SMEM (breaking chain), + // or + // (b) it is dependent on the SMEM, in which case an appropriate + // s_waitcnt lgkmcnt _must_ exist between it and the at risk + // SMEM instruction. return true; } } diff --git a/test/CodeGen/AMDGPU/smem-war-hazard.mir b/test/CodeGen/AMDGPU/smem-war-hazard.mir index e53d16a98fa..3c8f6e5e871 100644 --- a/test/CodeGen/AMDGPU/smem-war-hazard.mir +++ b/test/CodeGen/AMDGPU/smem-war-hazard.mir @@ -29,13 +29,13 @@ body: | S_ENDPGM 0 ... -# GCN-LABEL: name: hazard_smem_war_related_clause +# GCN-LABEL: name: hazard_smem_war_dependent_salu # GCN: S_LOAD_DWORD_IMM # GCN-NEXT: S_WAITCNT # GCN-NEXT: S_ADD_U32 # GCN-NEXT: V_CMP_EQ_F32 --- -name: hazard_smem_war_related_clause +name: hazard_smem_war_dependent_salu body: | bb.0: liveins: $sgpr0, $sgpr1, $sgpr4, $vgpr0, $vgpr1 @@ -46,19 +46,128 @@ body: | S_ENDPGM 0 ... -# GCN-LABEL: name: hazard_smem_war_related_clause_vmcnt +# GCN-LABEL: name: hazard_smem_war_independent_salu # GCN: S_LOAD_DWORD_IMM -# GCN-NEXT: S_WAITCNT 3952{{$}} +# GCN-NEXT: S_WAITCNT # GCN-NEXT: S_ADD_U32 # GCN-NEXT: V_CMP_EQ_F32 --- -name: hazard_smem_war_related_clause_vmcnt +name: hazard_smem_war_independent_salu body: | bb.0: - liveins: $sgpr0, $sgpr1, $sgpr4, $vgpr0, $vgpr1 + liveins: $sgpr0, $sgpr1, $sgpr4, $sgpr5, $vgpr0, $vgpr1 + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 + S_WAITCNT 0 + $sgpr3 = S_ADD_U32 $sgpr5, $sgpr4, implicit-def $scc + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: hazard_smem_war_only_smem +# GCN: S_LOAD_DWORD_IMM +# GCN-NEXT: S_LOAD_DWORD_IMM +# GCN-NEXT: $sgpr_null = S_MOV_B32 0 +# GCN-NEXT: V_CMP_EQ_F32 +--- +name: hazard_smem_war_only_smem +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr6, $sgpr7, $vgpr0, $vgpr1 + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 + $sgpr5 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0, 0 + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: hazard_smem_war_only_waitcnt_0 +# GCN: S_LOAD_DWORD_IMM +# GCN-NEXT: S_WAITCNT +# GCN-NEXT: V_CMP_EQ_F32 +--- +name: hazard_smem_war_only_waitcnt_0 +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 + S_WAITCNT 0 + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: hazard_smem_war_only_vmcnt_0 +# GCN: S_LOAD_DWORD_IMM +# GCN-NEXT: S_WAITCNT 3952{{$}} +# GCN-NEXT: $sgpr_null = S_MOV_B32 0 +# GCN-NEXT: V_CMP_EQ_F32 +--- +name: hazard_smem_war_only_vmcnt_0 +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 S_WAITCNT 3952 - $sgpr3 = S_ADD_U32 $sgpr2, $sgpr4, implicit-def $scc + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: hazard_smem_war_only_expcnt_0 +# GCN: S_LOAD_DWORD_IMM +# GCN-NEXT: S_WAITCNT 53007{{$}} +# GCN-NEXT: $sgpr_null = S_MOV_B32 0 +# GCN-NEXT: V_CMP_EQ_F32 +--- +name: hazard_smem_war_only_expcnt_0 +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 + S_WAITCNT 53007 + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: hazard_smem_war_only_lgkmcnt_0 +# GCN: S_LOAD_DWORD_IMM +# GCN-NEXT: S_WAITCNT 49279{{$}} +# GCN-NEXT: V_CMP_EQ_F32 +--- +name: hazard_smem_war_only_lgkmcnt_0 +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 + S_WAITCNT 49279 + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: hazard_smem_war_only_waitcnt_lgkmcnt_0 +# GCN: S_LOAD_DWORD_IMM +# GCN-NEXT: S_WAITCNT_LGKMCNT +# GCN-NEXT: V_CMP_EQ_F32 +--- +name: hazard_smem_war_only_waitcnt_lgkmcnt_0 +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 + S_WAITCNT_LGKMCNT $sgpr_null, 0 + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: hazard_smem_war_only_waitcnt_lgkmcnt_1 +# GCN: S_LOAD_DWORD_IMM +# GCN-NEXT: S_WAITCNT_LGKMCNT +# GCN-NEXT: $sgpr_null = S_MOV_B32 0 +# GCN-NEXT: V_CMP_EQ_F32 +--- +name: hazard_smem_war_only_waitcnt_lgkmcnt_1 +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 + S_WAITCNT_LGKMCNT $sgpr_null, 1 $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec S_ENDPGM 0 ... -- 2.50.1