AMDGPU: Support folding FrameIndex operands

author Matt Arsenault <Matthew.Arsenault@amd.com>

Wed, 14 Sep 2016 15:51:33 +0000 (15:51 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Wed, 14 Sep 2016 15:51:33 +0000 (15:51 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Wed, 14 Sep 2016 15:51:33 +0000 (15:51 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Wed, 14 Sep 2016 15:51:33 +0000 (15:51 +0000)
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp

index b55dee68d51073055eee62e54105786fc7de25a7..e1257b1d33e29f0cf5a5ae5aa3f9650cef67f470 100644 (file)
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -48,24 +48,36 @@ public:
  
  struct FoldCandidate {
    MachineInstr *UseMI;
-  unsigned UseOpNo;
-  MachineOperand *OpToFold;
-  uint64_t ImmToFold;
+  union {
+    MachineOperand *OpToFold;
+    uint64_t ImmToFold;
+    int FrameIndexToFold;
+  };
+  unsigned char UseOpNo;
+  MachineOperand::MachineOperandType Kind;
  
    FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
-                UseMI(MI), UseOpNo(OpNo) {
-
+    UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()) {
      if (FoldOp->isImm()) {
-      OpToFold = nullptr;
        ImmToFold = FoldOp->getImm();
+    } else if (FoldOp->isFI()) {
+      FrameIndexToFold = FoldOp->getIndex();
      } else {
        assert(FoldOp->isReg());
        OpToFold = FoldOp;
      }
    }
  
+  bool isFI() const {
+    return Kind == MachineOperand::MO_FrameIndex;
+  }
+
    bool isImm() const {
-    return !OpToFold;
+    return Kind == MachineOperand::MO_Immediate;
+  }
+
+  bool isReg() const {
+    return Kind == MachineOperand::MO_Register;
    }
  };
  
@@ -107,6 +119,11 @@ static bool updateOperand(FoldCandidate &Fold,
      return true;
    }
  
+  if (Fold.isFI()) {
+    Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
+    return true;
+  }
+
    MachineOperand *New = Fold.OpToFold;
    if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) &&
        TargetRegisterInfo::isVirtualRegister(New->getReg())) {
@@ -448,7 +465,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
  
        unsigned OpSize = TII->getOpSize(MI, 1);
        MachineOperand &OpToFold = MI.getOperand(1);
-      bool FoldingImm = OpToFold.isImm();
+      bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
  
        // FIXME: We could also be folding things like FrameIndexes and
        // TargetIndexes.
@@ -500,7 +517,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
        for (FoldCandidate &Fold : FoldList) {
          if (updateOperand(Fold, TRI)) {
            // Clear kill flags.
-          if (!Fold.isImm()) {
+          if (Fold.isReg()) {
              assert(Fold.OpToFold && Fold.OpToFold->isReg());
              // FIXME: Probably shouldn't bother trying to fold if not an
              // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
diff --git a/test/CodeGen/AMDGPU/local-stack-slot-bug.ll b/test/CodeGen/AMDGPU/local-stack-slot-bug.ll

index 6e6f289f5d6d6e6f8036af80a45b33db4a297e7a..eb554e2173059b584d05f7c0404506f80e18581d 100644 (file)
--- a/test/CodeGen/AMDGPU/local-stack-slot-bug.ll
+++ b/test/CodeGen/AMDGPU/local-stack-slot-bug.ll
@@ -7,10 +7,8 @@
  ;
  ; CHECK-LABEL: {{^}}main:
  ; CHECK: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
-; CHECK: v_mov_b32_e32 [[HI_CONST:v[0-9]+]], 0x200
-; CHECK: v_mov_b32_e32 [[LO_CONST:v[0-9]+]], 0
-; CHECK: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, [[BYTES]], [[HI_CONST]]
-; CHECK: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, [[BYTES]], [[LO_CONST]]
+; CHECK: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, 0x200, [[BYTES]]
+; CHECK: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, 0, [[BYTES]]
  ; CHECK: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
  ; CHECK: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
  define amdgpu_ps float @main(i32 %idx) {
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Wed, 14 Sep 2016 15:51:33 +0000 (15:51 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Wed, 14 Sep 2016 15:51:33 +0000 (15:51 +0000)
lib/Target/AMDGPU/SIFoldOperands.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/local-stack-slot-bug.ll		patch \| blob \| history