[AMDGPU] Prevent VGPR copies from moving across the EXEC mask definitions

author Alexander Timofeev <Alexander.Timofeev@amd.com>

Wed, 21 Aug 2019 15:15:04 +0000 (15:15 +0000)

committer Alexander Timofeev <Alexander.Timofeev@amd.com>

Wed, 21 Aug 2019 15:15:04 +0000 (15:15 +0000)
author Alexander Timofeev <Alexander.Timofeev@amd.com>
Wed, 21 Aug 2019 15:15:04 +0000 (15:15 +0000)
committer Alexander Timofeev <Alexander.Timofeev@amd.com>
Wed, 21 Aug 2019 15:15:04 +0000 (15:15 +0000)
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h

index 78c21ab005b2b75c2955e3690a1691ba0cc0389d..4856a45fd3bd366c3bc793cc2c40e03d91745ac4 100644 (file)
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -427,6 +427,22 @@ public:
      return getNumExplicitDefs() + MCID->getNumImplicitDefs();
    }
  
+  /// Returns true if the instruction has implicit definition.
+  bool hasImplicitDef() const {
+    for (unsigned I = getNumExplicitOperands(), E = getNumOperands();
+      I != E; ++I) {
+      const MachineOperand &MO = getOperand(I);
+      if (MO.isDef() && MO.isImplicit())
+        return true;
+    }
+    return false;
+  }
+
+  /// Returns the implicit operands number.
+  unsigned getNumImplicitOperands() const {
+    return getNumOperands() - getNumExplicitOperands();
+  }
+
    /// Return true if operand \p OpIdx is a subregister index.
    bool isOperandSubregIdx(unsigned OpIdx) const {
      assert(getOperand(OpIdx).getType() == MachineOperand::MO_Immediate &&
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp

index 6818195d8bb067aa9507945c9ddc9af2bffa30d8..02591a8917e6b98e96a00aaf6ad7f341f87f75c5 100644 (file)
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -1808,7 +1808,11 @@ ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
    assert(Def->isCopy() && "Invalid definition");
    // Copy instruction are supposed to be: Def = Src.
    // If someone breaks this assumption, bad things will happen everywhere.
-  assert(Def->getNumOperands() == 2 && "Invalid number of operands");
+  // There may be implicit uses preventing the copy to be moved across
+  // some target specific register definitions
+  assert(Def->getNumOperands() - Def->getNumImplicitOperands() == 2 &&
+         "Invalid number of operands");
+  assert(!Def->hasImplicitDef() && "Only implicit uses are allowed");
  
    if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
      // If we look for a different subreg, it means we want a subreg of src.
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp

index e33cf1d90081782de75eda70e2418448eb72cad7..caef36c70e71b8e64edc67b3d4ac750ad306717b 100644 (file)
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -612,6 +612,13 @@ void SIFoldOperands::foldOperand(
        return;
  
      UseMI->setDesc(TII->get(MovOp));
+    MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin();
+    MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end();
+    while (ImpOpI != ImpOpE) {
+      MachineInstr::mop_iterator Tmp = ImpOpI;
+      ImpOpI++;
+      UseMI->RemoveOperand(UseMI->getOperandNo(Tmp));
+    }
      CopiesToReplace.push_back(UseMI);
    } else {
      if (UseMI->isCopy() && OpToFold.isReg() &&
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp

index ea877272826b75d6f0ab6005f0d088e7dd086e08..a623fedde1e5f621b57ecd196e0b95913738cbba 100644 (file)
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4200,6 +4200,15 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
    // Try to eliminate the copy if it is copying an immediate value.
    if (Def->isMoveImmediate())
      FoldImmediate(*Copy, *Def, OpReg, &MRI);
+
+  bool ImpDef = Def->isImplicitDef();
+  while (!ImpDef && Def && Def->isCopy()) {
+    Def = MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
+    ImpDef = Def && Def->isImplicitDef();
+  }
+  if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
+      !ImpDef)
+    Copy->addOperand(MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
  }
  
  // Emit the actual waterfall loop, executing the wrapped instruction for each
diff --git a/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir b/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir

index 3ac4f209b61f868afb0c15ecfdfef0c996355857..49b8b1af7d5e7b0e62db9d7aaf3ae9bc0b42c14b 100644 (file)
--- a/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
+++ b/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
@@ -882,7 +882,7 @@ body:             |
  ---
  
  # GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_immreg{{$}}
-# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
  # GCN-NEXT: S_ENDPGM 0, implicit %3
  
  name: constant_fold_lshl_or_reg0_immreg_immreg
author	Alexander Timofeev <Alexander.Timofeev@amd.com>
	Wed, 21 Aug 2019 15:15:04 +0000 (15:15 +0000)
committer	Alexander Timofeev <Alexander.Timofeev@amd.com>
	Wed, 21 Aug 2019 15:15:04 +0000 (15:15 +0000)
include/llvm/CodeGen/MachineInstr.h		patch \| blob \| history
lib/CodeGen/PeepholeOptimizer.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIFoldOperands.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIInstrInfo.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir		patch \| blob \| history