From 7822759c5ffba89386fe03ec258f78a09d263efc Mon Sep 17 00:00:00 2001 From: Alexander Timofeev Date: Wed, 21 Aug 2019 15:15:04 +0000 Subject: [PATCH] [AMDGPU] Prevent VGPR copies from moving across the EXEC mask definitions Differential Revision: https://reviews.llvm.org/D63731 Reviewers: qcolombet, rampitec git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@369532 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/MachineInstr.h | 16 ++++++++++++++++ lib/CodeGen/PeepholeOptimizer.cpp | 6 +++++- lib/Target/AMDGPU/SIFoldOperands.cpp | 7 +++++++ lib/Target/AMDGPU/SIInstrInfo.cpp | 9 +++++++++ test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir | 2 +- 5 files changed, 38 insertions(+), 2 deletions(-) diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h index 78c21ab005b..4856a45fd3b 100644 --- a/include/llvm/CodeGen/MachineInstr.h +++ b/include/llvm/CodeGen/MachineInstr.h @@ -427,6 +427,22 @@ public: return getNumExplicitDefs() + MCID->getNumImplicitDefs(); } + /// Returns true if the instruction has implicit definition. + bool hasImplicitDef() const { + for (unsigned I = getNumExplicitOperands(), E = getNumOperands(); + I != E; ++I) { + const MachineOperand &MO = getOperand(I); + if (MO.isDef() && MO.isImplicit()) + return true; + } + return false; + } + + /// Returns the implicit operands number. + unsigned getNumImplicitOperands() const { + return getNumOperands() - getNumExplicitOperands(); + } + /// Return true if operand \p OpIdx is a subregister index. bool isOperandSubregIdx(unsigned OpIdx) const { assert(getOperand(OpIdx).getType() == MachineOperand::MO_Immediate && diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 6818195d8bb..02591a8917e 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -1808,7 +1808,11 @@ ValueTrackerResult ValueTracker::getNextSourceFromCopy() { assert(Def->isCopy() && "Invalid definition"); // Copy instruction are supposed to be: Def = Src. // If someone breaks this assumption, bad things will happen everywhere. - assert(Def->getNumOperands() == 2 && "Invalid number of operands"); + // There may be implicit uses preventing the copy to be moved across + // some target specific register definitions + assert(Def->getNumOperands() - Def->getNumImplicitOperands() == 2 && + "Invalid number of operands"); + assert(!Def->hasImplicitDef() && "Only implicit uses are allowed"); if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) // If we look for a different subreg, it means we want a subreg of src. diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp index e33cf1d9008..caef36c70e7 100644 --- a/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -612,6 +612,13 @@ void SIFoldOperands::foldOperand( return; UseMI->setDesc(TII->get(MovOp)); + MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin(); + MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end(); + while (ImpOpI != ImpOpE) { + MachineInstr::mop_iterator Tmp = ImpOpI; + ImpOpI++; + UseMI->RemoveOperand(UseMI->getOperandNo(Tmp)); + } CopiesToReplace.push_back(UseMI); } else { if (UseMI->isCopy() && OpToFold.isReg() && diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index ea877272826..a623fedde1e 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4200,6 +4200,15 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB, // Try to eliminate the copy if it is copying an immediate value. if (Def->isMoveImmediate()) FoldImmediate(*Copy, *Def, OpReg, &MRI); + + bool ImpDef = Def->isImplicitDef(); + while (!ImpDef && Def && Def->isCopy()) { + Def = MRI.getUniqueVRegDef(Def->getOperand(1).getReg()); + ImpDef = Def && Def->isImplicitDef(); + } + if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) && + !ImpDef) + Copy->addOperand(MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); } // Emit the actual waterfall loop, executing the wrapped instruction for each diff --git a/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir b/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir index 3ac4f209b61..49b8b1af7d5 100644 --- a/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir +++ b/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir @@ -882,7 +882,7 @@ body: | --- # GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_immreg{{$}} -# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec, implicit $exec +# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec # GCN-NEXT: S_ENDPGM 0, implicit %3 name: constant_fold_lshl_or_reg0_immreg_immreg -- 2.40.0