From b7cef668c0bd4ee48bb77ec726fbcb4fdf9e55a3 Mon Sep 17 00:00:00 2001 From: Austin Kerbow Date: Mon, 15 Jul 2019 22:07:05 +0000 Subject: [PATCH] [AMDGPU] Enable merging m0 initializations. Summary: Enable hoisting and merging m0 defs that are initialized with the same immediate value. Fixes bug where removed instructions are not considered to interfere with other inits, and make sure to not hoist inits before block prologues. Reviewers: rampitec, arsenm Reviewed By: rampitec Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64766 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366135 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 47 +++++++---- test/CodeGen/AMDGPU/merge-m0.mir | 108 ++++++++++++++++++++++++-- 2 files changed, 133 insertions(+), 22 deletions(-) diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 18598d6cef4..624953963cf 100644 --- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -103,7 +103,7 @@ using namespace llvm; static cl::opt EnableM0Merge( "amdgpu-enable-merge-m0", cl::desc("Merge and hoist M0 initializations"), - cl::init(false)); + cl::init(true)); namespace { @@ -452,18 +452,32 @@ static bool isReachable(const MachineInstr *From, (const MachineBasicBlock *MBB) { return MBB == MBBFrom; }); } +// Return the first non-prologue instruction in the block. +static MachineBasicBlock::iterator +getFirstNonPrologue(MachineBasicBlock *MBB, const TargetInstrInfo *TII) { + MachineBasicBlock::iterator I = MBB->getFirstNonPHI(); + while (I != MBB->end() && TII->isBasicBlockPrologue(*I)) + ++I; + + return I; +} + // Hoist and merge identical SGPR initializations into a common predecessor. // This is intended to combine M0 initializations, but can work with any // SGPR. A VGPR cannot be processed since we cannot guarantee vector // executioon. static bool hoistAndMergeSGPRInits(unsigned Reg, const MachineRegisterInfo &MRI, - MachineDominatorTree &MDT) { + MachineDominatorTree &MDT, + const TargetInstrInfo *TII) { // List of inits by immediate value. using InitListMap = std::map>; InitListMap Inits; // List of clobbering instructions. SmallVector Clobbers; + // List of instructions marked for deletion. + SmallSet MergedInstrs; + bool Changed = false; for (auto &MI : MRI.def_instructions(Reg)) { @@ -492,8 +506,8 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, MachineInstr *MI2 = *I2; // Check any possible interference - auto intereferes = [&](MachineBasicBlock::iterator From, - MachineBasicBlock::iterator To) -> bool { + auto interferes = [&](MachineBasicBlock::iterator From, + MachineBasicBlock::iterator To) -> bool { assert(MDT.dominates(&*To, &*From)); @@ -525,23 +539,23 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, }; if (MDT.dominates(MI1, MI2)) { - if (!intereferes(MI2, MI1)) { + if (!interferes(MI2, MI1)) { LLVM_DEBUG(dbgs() << "Erasing from " << printMBBReference(*MI2->getParent()) << " " << *MI2); - MI2->eraseFromParent(); - Defs.erase(I2++); + MergedInstrs.insert(MI2); Changed = true; + ++I2; continue; } } else if (MDT.dominates(MI2, MI1)) { - if (!intereferes(MI1, MI2)) { + if (!interferes(MI1, MI2)) { LLVM_DEBUG(dbgs() << "Erasing from " << printMBBReference(*MI1->getParent()) << " " << *MI1); - MI1->eraseFromParent(); - Defs.erase(I1++); + MergedInstrs.insert(MI1); Changed = true; + ++I1; break; } } else { @@ -552,8 +566,8 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, continue; } - MachineBasicBlock::iterator I = MBB->getFirstNonPHI(); - if (!intereferes(MI1, I) && !intereferes(MI2, I)) { + MachineBasicBlock::iterator I = getFirstNonPrologue(MBB, TII); + if (!interferes(MI1, I) && !interferes(MI2, I)) { LLVM_DEBUG(dbgs() << "Erasing from " << printMBBReference(*MI1->getParent()) << " " << *MI1 @@ -561,9 +575,9 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, << printMBBReference(*MI2->getParent()) << " to " << printMBBReference(*I->getParent()) << " " << *MI2); I->getParent()->splice(I, MI2->getParent(), MI2); - MI1->eraseFromParent(); - Defs.erase(I1++); + MergedInstrs.insert(MI1); Changed = true; + ++I1; break; } } @@ -573,6 +587,9 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, } } + for (auto MI : MergedInstrs) + MI->removeFromParent(); + if (Changed) MRI.clearKillFlags(Reg); @@ -723,7 +740,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { } if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge) - hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT); + hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT, TII); return true; } diff --git a/test/CodeGen/AMDGPU/merge-m0.mir b/test/CodeGen/AMDGPU/merge-m0.mir index 73a6b136850..bafbce05a88 100644 --- a/test/CodeGen/AMDGPU/merge-m0.mir +++ b/test/CodeGen/AMDGPU/merge-m0.mir @@ -47,13 +47,7 @@ # GCN-NEXT: DS_WRITE_B32 --- -name: test -alignment: 0 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true +name: merge-m0-many-init registers: - { id: 0, class: vgpr_32 } - { id: 1, class: vgpr_32 } @@ -129,3 +123,103 @@ body: | S_BRANCH %bb.0.entry ... + +# GCN: bb.0.entry: +# GCN: SI_INIT_M0 65536 +# GCN-NEXT: DS_WRITE_B32 + +#GCN: bb.1: +#GCN-NOT: SI_INIT_M0 65536 +#GCN-NOT: SI_INIT_M0 -1 + +#GCN: bb.2: +#GCN: SI_INIT_M0 -1 + +#GCN: bb.3: +#GCN: SI_INIT_M0 -1 + +--- +name: merge-m0-dont-hoist-past-init-with-different-initializer +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } +body: | + bb.0.entry: + successors: %bb.1 + + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + SI_INIT_M0 65536, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2, %bb.3 + + SI_INIT_M0 65536, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + S_CBRANCH_VCCZ %bb.2, implicit undef $vcc + S_BRANCH %bb.3 + + bb.2: + successors: %bb.4 + + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + successors: %bb.4 + + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.4 + + bb.4: + S_ENDPGM 0 +... + +# GCN: bb.0.entry: +# GCN-NOT: SI_INIT_M0 +# GCN: S_OR_B64 +# GCN-NEXT: SI_INIT_M0 + +#GCN: bb.1: +#GCN-NOT: SI_INIT_M0 -1 + +#GCN: bb.2: +#GCN-NOT: SI_INIT_MO -1 + +--- +name: merge-m0-after-prologue +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } +body: | + bb.0.entry: + successors: %bb.1, %bb.2 + liveins: $sgpr0_sgpr1 + + $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + S_CBRANCH_VCCZ %bb.1, implicit undef $vcc + S_BRANCH %bb.2 + + bb.1: + successors: %bb.3 + + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.3 + + bb.2: + successors: %bb.3 + + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.3 + + bb.3: + S_ENDPGM 0 +... -- 2.40.0