[AMDGPU] Do not insert an instruction into worklist twice in movetovalu

author Alfred Huang <alfred.j.huang@gmail.com>

Fri, 14 Jul 2017 17:56:55 +0000 (17:56 +0000)

committer Alfred Huang <alfred.j.huang@gmail.com>

Fri, 14 Jul 2017 17:56:55 +0000 (17:56 +0000)
author Alfred Huang <alfred.j.huang@gmail.com>
Fri, 14 Jul 2017 17:56:55 +0000 (17:56 +0000)
committer Alfred Huang <alfred.j.huang@gmail.com>
Fri, 14 Jul 2017 17:56:55 +0000 (17:56 +0000)
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp

index 160f8837d49c85627b305811b3508ad0509e6112..a7e0feb10b9f15c5cfcb4816c040c33f8d71f48e 100644 (file)
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3408,8 +3408,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
  }
  
  void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
-  SmallVector<MachineInstr *, 128> Worklist;
-  Worklist.push_back(&TopInst);
+  SetVectorType Worklist;
+  Worklist.insert(&TopInst);
  
    while (!Worklist.empty()) {
      MachineInstr &Inst = *Worklist.pop_back_val();
@@ -3610,7 +3610,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
    }
  }
  
-void SIInstrInfo::lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist,
+void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist,
                                   MachineInstr &Inst) const {
    MachineBasicBlock &MBB = *Inst.getParent();
    MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -3635,7 +3635,7 @@ void SIInstrInfo::lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist,
  }
  
  void SIInstrInfo::splitScalar64BitUnaryOp(
-    SmallVectorImpl<MachineInstr *> &Worklist, MachineInstr &Inst,
+    SetVectorType &Worklist, MachineInstr &Inst,
      unsigned Opcode) const {
    MachineBasicBlock &MBB = *Inst.getParent();
    MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -3686,7 +3686,7 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
  }
  
  void SIInstrInfo::splitScalar64BitBinaryOp(
-    SmallVectorImpl<MachineInstr *> &Worklist, MachineInstr &Inst,
+    SetVectorType &Worklist, MachineInstr &Inst,
      unsigned Opcode) const {
    MachineBasicBlock &MBB = *Inst.getParent();
    MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -3753,7 +3753,7 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
  }
  
  void SIInstrInfo::splitScalar64BitBCNT(
-    SmallVectorImpl<MachineInstr *> &Worklist, MachineInstr &Inst) const {
+    SetVectorType &Worklist, MachineInstr &Inst) const {
    MachineBasicBlock &MBB = *Inst.getParent();
    MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
  
@@ -3789,7 +3789,7 @@ void SIInstrInfo::splitScalar64BitBCNT(
    addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
  }
  
-void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
+void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist,
                                        MachineInstr &Inst) const {
    MachineBasicBlock &MBB = *Inst.getParent();
    MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -3853,12 +3853,12 @@ void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
  void SIInstrInfo::addUsersToMoveToVALUWorklist(
    unsigned DstReg,
    MachineRegisterInfo &MRI,
-  SmallVectorImpl<MachineInstr *> &Worklist) const {
+  SetVectorType &Worklist) const {
    for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
           E = MRI.use_end(); I != E;) {
      MachineInstr &UseMI = *I->getParent();
      if (!canReadVGPR(UseMI, I.getOperandNo())) {
-      Worklist.push_back(&UseMI);
+      Worklist.insert(&UseMI);
  
        do {
          ++I;
@@ -3869,7 +3869,7 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist(
    }
  }
  
-void SIInstrInfo::movePackToVALU(SmallVectorImpl<MachineInstr *> &Worklist,
+void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
                                   MachineRegisterInfo &MRI,
                                   MachineInstr &Inst) const {
    unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
@@ -3932,7 +3932,7 @@ void SIInstrInfo::movePackToVALU(SmallVectorImpl<MachineInstr *> &Worklist,
  }
  
  void SIInstrInfo::addSCCDefUsersToVALUWorklist(
-    MachineInstr &SCCDefInst, SmallVectorImpl<MachineInstr *> &Worklist) const {
+    MachineInstr &SCCDefInst, SetVectorType &Worklist) const {
    // This assumes that all the users of SCC are in the same block
    // as the SCC def.
    for (MachineInstr &MI :
@@ -3943,7 +3943,7 @@ void SIInstrInfo::addSCCDefUsersToVALUWorklist(
        return;
  
      if (MI.findRegisterUseOperandIdx(AMDGPU::SCC) != -1)
-      Worklist.push_back(&MI);
+      Worklist.insert(&MI);
    }
  }
  
diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h

index d00c0d4a7f4eac0bb9f6fb18c973709c3928040e..3dd5bc89e6c77baa87ad89af34f93fada7553589 100644 (file)
--- a/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/lib/Target/AMDGPU/SIInstrInfo.h
@@ -19,6 +19,7 @@
  #include "AMDGPUInstrInfo.h"
  #include "SIDefines.h"
  #include "SIRegisterInfo.h"
+#include "llvm/ADT/SetVector.h"
  
  namespace llvm {
  
@@ -38,6 +39,8 @@ private:
      EXECZ = 3
    };
  
+  typedef SmallSetVector<MachineInstr *, 32> SetVectorType;
+
    static unsigned getBranchOpcode(BranchPredicate Cond);
    static BranchPredicate getBranchPredicate(unsigned Opcode);
  
@@ -56,30 +59,30 @@ private:
  
    void swapOperands(MachineInstr &Inst) const;
  
-  void lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist,
+  void lowerScalarAbs(SetVectorType &Worklist,
                        MachineInstr &Inst) const;
  
-  void splitScalar64BitUnaryOp(SmallVectorImpl<MachineInstr *> &Worklist,
+  void splitScalar64BitUnaryOp(SetVectorType &Worklist,
                                 MachineInstr &Inst, unsigned Opcode) const;
  
-  void splitScalar64BitBinaryOp(SmallVectorImpl<MachineInstr *> &Worklist,
+  void splitScalar64BitBinaryOp(SetVectorType &Worklist,
                                  MachineInstr &Inst, unsigned Opcode) const;
  
-  void splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
+  void splitScalar64BitBCNT(SetVectorType &Worklist,
                              MachineInstr &Inst) const;
-  void splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
+  void splitScalar64BitBFE(SetVectorType &Worklist,
                             MachineInstr &Inst) const;
-  void movePackToVALU(SmallVectorImpl<MachineInstr *> &Worklist,
+  void movePackToVALU(SetVectorType &Worklist,
                        MachineRegisterInfo &MRI,
                        MachineInstr &Inst) const;
  
    void addUsersToMoveToVALUWorklist(
      unsigned Reg, MachineRegisterInfo &MRI,
-    SmallVectorImpl<MachineInstr *> &Worklist) const;
+    SetVectorType &Worklist) const;
  
    void
    addSCCDefUsersToVALUWorklist(MachineInstr &SCCDefInst,
-                               SmallVectorImpl<MachineInstr *> &Worklist) const;
+                               SetVectorType &Worklist) const;
  
    const TargetRegisterClass *
    getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
diff --git a/test/CodeGen/AMDGPU/move-to-valu-worklist.ll b/test/CodeGen/AMDGPU/move-to-valu-worklist.ll

new file mode 100644 (file)

index 0000000..539eed9
--- /dev/null
+++ b/test/CodeGen/AMDGPU/move-to-valu-worklist.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
+
+; In moveToVALU(), move to vector ALU is performed, all instrs in
+; the use chain will be visited. We do not want the same node to be 
+; pushed to the visit worklist more than once.
+               
+; GCN-LABEL: {{^}}in_worklist_once:
+; GCN: buffer_load_dword
+; GCN: BB0_1:
+; GCN: v_xor_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; GCN-NEXT: v_xor_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; GCN: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; GCN-NEXT: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+define amdgpu_kernel void @in_worklist_once() #0 {
+bb:
+       %tmp = load i64, i64* undef
+br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+       %tmp2 = phi i64 [ undef, %bb ], [ %tmp16, %bb1 ]
+       %tmp3 = phi i64 [ %tmp, %bb ], [ undef, %bb1 ]
+       %tmp11 = shl i64 %tmp2, 14
+       %tmp13 = xor i64 %tmp11, %tmp2
+       %tmp15 = and i64 %tmp3, %tmp13
+       %tmp16 = xor i64 %tmp15, %tmp3
+br label %bb1
+}
+
+attributes #0 = { nounwind }
author	Alfred Huang <alfred.j.huang@gmail.com>
	Fri, 14 Jul 2017 17:56:55 +0000 (17:56 +0000)
committer	Alfred Huang <alfred.j.huang@gmail.com>
	Fri, 14 Jul 2017 17:56:55 +0000 (17:56 +0000)
lib/Target/AMDGPU/SIInstrInfo.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIInstrInfo.h		patch \| blob \| history
test/CodeGen/AMDGPU/move-to-valu-worklist.ll	[new file with mode: 0644]	patch \| blob