R600/SI: Only fold immediates that have one use

author Tom Stellard <thomas.stellard@amd.com>

Wed, 7 Jan 2015 22:18:27 +0000 (22:18 +0000)

committer Tom Stellard <thomas.stellard@amd.com>

Wed, 7 Jan 2015 22:18:27 +0000 (22:18 +0000)
author Tom Stellard <thomas.stellard@amd.com>
Wed, 7 Jan 2015 22:18:27 +0000 (22:18 +0000)
committer Tom Stellard <thomas.stellard@amd.com>
Wed, 7 Jan 2015 22:18:27 +0000 (22:18 +0000)
diff --git a/lib/Target/R600/SIFoldOperands.cpp b/lib/Target/R600/SIFoldOperands.cpp

index ddb285213d2a58af2c4d585a5ff2c28609bef817..545905ba64e1c2d3b305c23e7bd307fcda25cd35 100644 (file)
--- a/lib/Target/R600/SIFoldOperands.cpp
+++ b/lib/Target/R600/SIFoldOperands.cpp
@@ -138,6 +138,14 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
          continue;
  
        MachineOperand &OpToFold = MI.getOperand(1);
+      bool FoldingImm = OpToFold.isImm() || OpToFold.isFPImm();
+
+      // Folding immediates with more than one use will increase program side.
+      // FIXME: This will also reduce register usage, which may be better
+      // in some cases.  A better heuristic is needed.
+      if (FoldingImm && !TII->isInlineConstant(OpToFold) &&
+          !MRI.hasOneUse(MI.getOperand(0).getReg()))
+        continue;
  
        // FIXME: Fold operands with subregs.
        if (OpToFold.isReg() &&
@@ -158,7 +166,6 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
            continue;
          }
  
-        bool FoldingImm = OpToFold.isImm() || OpToFold.isFPImm();
          APInt Imm;
  
          if (FoldingImm) {
diff --git a/test/CodeGen/R600/operand-folding.ll b/test/CodeGen/R600/operand-folding.ll

index 4693ff990612ab02f672e264226a62eb77c7a7ee..c430c5a0dcc89f873e0afcecb2e062cbbbf1d424 100644 (file)
--- a/test/CodeGen/R600/operand-folding.ll
+++ b/test/CodeGen/R600/operand-folding.ll
@@ -53,5 +53,40 @@ entry:
    ret void
  }
  
+; Inline constants should always be folded.
+
+; CHECK-LABEL: {{^}}vector_inline:
+; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}
+; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}
+; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}
+; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}
+
+define void @vector_inline(<4 x i32> addrspace(1)* %out) {
+entry:
+  %tmp0 = call i32 @llvm.r600.read.tidig.x()
+  %tmp1 = add i32 %tmp0, 1
+  %tmp2 = add i32 %tmp0, 2
+  %tmp3 = add i32 %tmp0, 3
+  %vec0 = insertelement <4 x i32> undef, i32 %tmp0, i32 0
+  %vec1 = insertelement <4 x i32> %vec0, i32 %tmp1, i32 1
+  %vec2 = insertelement <4 x i32> %vec1, i32 %tmp2, i32 2
+  %vec3 = insertelement <4 x i32> %vec2, i32 %tmp3, i32 3
+  %tmp4 = xor <4 x i32> <i32 5, i32 5, i32 5, i32 5>, %vec3
+  store <4 x i32> %tmp4, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; Immediates with one use should be folded
+; CHECK-LABEL: {{^}}imm_one_use:
+; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 0x64, v{{[0-9]+}}
+
+define void @imm_one_use(i32 addrspace(1)* %out) {
+entry:
+  %tmp0 = call i32 @llvm.r600.read.tidig.x()
+  %tmp1 = xor i32 %tmp0, 100
+  store i32 %tmp1, i32 addrspace(1)* %out
+  ret void
+}
+
  declare i32 @llvm.r600.read.tidig.x() #0
  attributes #0 = { readnone }
author	Tom Stellard <thomas.stellard@amd.com>
	Wed, 7 Jan 2015 22:18:27 +0000 (22:18 +0000)
committer	Tom Stellard <thomas.stellard@amd.com>
	Wed, 7 Jan 2015 22:18:27 +0000 (22:18 +0000)
lib/Target/R600/SIFoldOperands.cpp		patch \| blob \| history
test/CodeGen/R600/operand-folding.ll		patch \| blob \| history