From 36ba7962a4627f9e557c5db39f9ae845c8f5fcc8 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 24 Sep 2014 01:33:24 +0000 Subject: [PATCH] R600/SI: Fix the FixSGPRLiveRanges pass The previous implementation was extending the live range of SGPRs by modifying the live intervals directly. This was causing a lot of machine verification errors when the machine scheduler was enabled. The new implementation adds pseudo instructions with implicit uses to extend the live ranges of SGPRs, which works much better. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218351 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUTargetMachine.cpp | 3 +- lib/Target/R600/SIFixSGPRLiveRanges.cpp | 143 +++++++++++++++++++----- lib/Target/R600/SIInstrInfo.cpp | 4 + lib/Target/R600/SIInstructions.td | 4 + 4 files changed, 121 insertions(+), 33 deletions(-) diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 1203cce54ec..c95a9410ff6 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -149,8 +149,7 @@ bool AMDGPUPassConfig::addPreRegAlloc() { // so we need to run MachineCSE afterwards. addPass(&MachineCSEID); addPass(createSIShrinkInstructionsPass()); - initializeSIFixSGPRLiveRangesPass(*PassRegistry::getPassRegistry()); - insertPass(&RegisterCoalescerID, &SIFixSGPRLiveRangesID); + addPass(createSIFixSGPRLiveRangesPass()); } return false; } diff --git a/lib/Target/R600/SIFixSGPRLiveRanges.cpp b/lib/Target/R600/SIFixSGPRLiveRanges.cpp index 36e0e481ab5..f34c3758043 100644 --- a/lib/Target/R600/SIFixSGPRLiveRanges.cpp +++ b/lib/Target/R600/SIFixSGPRLiveRanges.cpp @@ -9,18 +9,49 @@ // /// \file /// SALU instructions ignore control flow, so we need to modify the live ranges -/// of the registers they define. +/// of the registers they define in some cases. /// -/// The strategy is to view the entire program as if it were a single basic -/// block and calculate the intervals accordingly. We implement this -/// by walking this list of segments for each LiveRange and setting the -/// end of each segment equal to the start of the segment that immediately -/// follows it. +/// The main case we need to handle is when a def is used in one side of a +/// branch and not another. For example: +/// +/// %def +/// IF +/// ... +/// ... +/// ELSE +/// %use +/// ... +/// ENDIF +/// +/// Here we need the register allocator to avoid assigning any of the defs +/// inside of the IF to the same register as %def. In traditional live +/// interval analysis %def is not live inside the IF branch, however, since +/// SALU instructions inside of IF will be executed even if the branch is not +/// taken, there is the chance that one of the instructions will overwrite the +/// value of %def, so the use in ELSE will see the wrong value. +/// +/// The strategy we use for solving this is to add an extra use after the ENDIF: +/// +/// %def +/// IF +/// ... +/// ... +/// ELSE +/// %use +/// ... +/// ENDIF +/// %use +/// +/// Adding this use will make the def live thoughout the IF branch, which is +/// what we want. #include "AMDGPU.h" +#include "SIInstrInfo.h" #include "SIRegisterInfo.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetMachine.h" @@ -48,8 +79,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); - AU.addPreserved(); - AU.addPreserved(); + AU.addRequired(); AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -60,6 +90,7 @@ public: INITIALIZE_PASS_BEGIN(SIFixSGPRLiveRanges, DEBUG_TYPE, "SI Fix SGPR Live Ranges", false, false) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) INITIALIZE_PASS_END(SIFixSGPRLiveRanges, DEBUG_TYPE, "SI Fix SGPR Live Ranges", false, false) @@ -73,36 +104,86 @@ FunctionPass *llvm::createSIFixSGPRLiveRangesPass() { bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); - const SIRegisterInfo *TRI = - static_cast(MF.getSubtarget().getRegisterInfo()); + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + const SIRegisterInfo *TRI = static_cast( + MF.getSubtarget().getRegisterInfo()); LiveIntervals *LIS = &getAnalysis(); + MachinePostDominatorTree *PDT = &getAnalysis(); + std::vector> SGPRLiveRanges; + + // First pass, collect all live intervals for SGPRs + for (const MachineBasicBlock &MBB : MF) { + for (const MachineInstr &MI : MBB) { + for (const MachineOperand &MO : MI.defs()) { + if (MO.isImplicit()) + continue; + unsigned Def = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(Def)) { + if (TRI->isSGPRClass(MRI.getRegClass(Def))) + SGPRLiveRanges.push_back( + std::make_pair(Def, &LIS->getInterval(Def))); + } else if (TRI->isSGPRClass(TRI->getPhysRegClass(Def))) { + SGPRLiveRanges.push_back( + std::make_pair(Def, &LIS->getRegUnit(Def))); + } + } + } + } + // Second pass fix the intervals for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { - MachineBasicBlock &MBB = *BI; - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) { - MachineInstr &MI = *I; - MachineOperand *ExecUse = MI.findRegisterUseOperand(AMDGPU::EXEC); - if (ExecUse) + if (MBB.succ_size() < 2) + continue; + + // We have structured control flow, so number of succesors should be two. + assert(MBB.succ_size() == 2); + MachineBasicBlock *SuccA = *MBB.succ_begin(); + MachineBasicBlock *SuccB = *(++MBB.succ_begin()); + MachineBasicBlock *NCD = PDT->findNearestCommonDominator(SuccA, SuccB); + + if (!NCD) + continue; + + MachineBasicBlock::iterator NCDTerm = NCD->getFirstTerminator(); + + if (NCDTerm != NCD->end() && NCDTerm->getOpcode() == AMDGPU::SI_ELSE) { + assert(NCD->succ_size() == 2); + // We want to make sure we insert the Use after the ENDIF, not after + // the ELSE. + NCD = PDT->findNearestCommonDominator(*NCD->succ_begin(), + *(++NCD->succ_begin())); + } + assert(SuccA && SuccB); + for (std::pair RegLR : SGPRLiveRanges) { + unsigned Reg = RegLR.first; + LiveRange *LR = RegLR.second; + + // FIXME: We could be smarter here. If the register is Live-In to + // one block, but the other doesn't have any SGPR defs, then there + // won't be a conflict. Also, if the branch decision is based on + // a value in an SGPR, then there will be no conflict. + bool LiveInToA = LIS->isLiveInToMBB(*LR, SuccA); + bool LiveInToB = LIS->isLiveInToMBB(*LR, SuccB); + + if ((!LiveInToA && !LiveInToB) || + (LiveInToA && LiveInToB)) continue; - for (const MachineOperand &Def : MI.operands()) { - if (!Def.isReg() || !Def.isDef() ||!TargetRegisterInfo::isVirtualRegister(Def.getReg())) - continue; - - const TargetRegisterClass *RC = MRI.getRegClass(Def.getReg()); - - if (!TRI->isSGPRClass(RC)) - continue; - LiveInterval &LI = LIS->getInterval(Def.getReg()); - for (unsigned i = 0, e = LI.size() - 1; i != e; ++i) { - LiveRange::Segment &Seg = LI.segments[i]; - LiveRange::Segment &Next = LI.segments[i + 1]; - Seg.end = Next.start; - } - } + // This interval is live in to one successor, but not the other, so + // we need to update its range so it is live in to both. + DEBUG(dbgs() << "Possible SGPR conflict detected " << " in " << *LR << + " BB#" << SuccA->getNumber() << ", BB#" << + SuccB->getNumber() << + " with NCD = " << NCD->getNumber() << '\n'); + + // FIXME: Need to figure out how to update LiveRange here so this pass + // will be able to preserve LiveInterval analysis. + BuildMI(*NCD, NCD->getFirstNonPHI(), DebugLoc(), + TII->get(AMDGPU::SGPR_USE)) + .addReg(Reg, RegState::Implicit); + DEBUG(NCD->getFirstNonPHI()->dump()); } } diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 3b6a3626768..e53db58123b 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -674,6 +674,10 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { MI->eraseFromParent(); break; } + case AMDGPU::SGPR_USE: + // This is just a placeholder for register allocation. + MI->eraseFromParent(); + break; } return true; } diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 8d23fa46d3d..d741c84a650 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1652,6 +1652,10 @@ def V_XOR_I1 : InstSI < [(set i1:$dst, (xor i1:$src0, i1:$src1))] >; +let hasSideEffects = 1 in { +def SGPR_USE : InstSI <(outs),(ins), "", []>; +} + // SI pseudo instructions. These are used by the CFG structurizer pass // and should be lowered to ISA instructions prior to codegen. -- 2.40.0