[SystemZ] Modelling of costs of divisions with a constant power of 2.

author Jonas Paulsson <paulsson@linux.vnet.ibm.com>

Wed, 17 May 2017 12:46:26 +0000 (12:46 +0000)

committer Jonas Paulsson <paulsson@linux.vnet.ibm.com>

Wed, 17 May 2017 12:46:26 +0000 (12:46 +0000)
author Jonas Paulsson <paulsson@linux.vnet.ibm.com>
Wed, 17 May 2017 12:46:26 +0000 (12:46 +0000)
committer Jonas Paulsson <paulsson@linux.vnet.ibm.com>
Wed, 17 May 2017 12:46:26 +0000 (12:46 +0000)
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp

index f56b238f91e66d01cee5036122c4ce14c4331a5e..6a3dc6799c43f105e2ac7c3088713c826dde3889 100644 (file)
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -325,6 +325,30 @@ int SystemZTTIImpl::getArithmeticInstrCost(
  
    unsigned ScalarBits = Ty->getScalarSizeInBits();
  
+  // Div with a constant which is a power of 2 will be converted by
+  // DAGCombiner to use shifts. With vector shift-element instructions, a
+  // vector sdiv costs about as much as a scalar one.
+  const unsigned SDivCostEstimate = 4;
+  bool SDivPow2 = false;
+  bool UDivPow2 = false;
+  if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv) &&
+      Args.size() == 2) {
+    const ConstantInt *CI = nullptr;
+    if (const Constant *C = dyn_cast<Constant>(Args[1])) {
+      if (C->getType()->isVectorTy())
+        CI = dyn_cast_or_null<const ConstantInt>(C->getSplatValue());
+      else
+        CI = dyn_cast<const ConstantInt>(C);
+    }
+    if (CI != nullptr &&
+        (CI->getValue().isPowerOf2() || (-CI->getValue()).isPowerOf2())) {
+      if (Opcode == Instruction::SDiv)
+        SDivPow2 = true;
+      else
+        UDivPow2 = true;
+    }
+  }
+
    if (Ty->isVectorTy()) {
      assert (ST->hasVector() && "getArithmeticInstrCost() called with vector type.");
      unsigned VF = Ty->getVectorNumElements();
@@ -333,10 +357,13 @@ int SystemZTTIImpl::getArithmeticInstrCost(
      // These vector operations are custom handled, but are still supported
      // with one instruction per vector, regardless of element size.
      if (Opcode == Instruction::Shl || Opcode == Instruction::LShr ||
-        Opcode == Instruction::AShr) {
+        Opcode == Instruction::AShr || UDivPow2) {
        return NumVectors;
      }
  
+    if (SDivPow2)
+      return (NumVectors * SDivCostEstimate);
+
      // These FP operations are supported with a single vector instruction for
      // double (base implementation assumes float generally costs 2). For
      // FP128, the scalar cost is 1, and there is no overhead since the values
@@ -395,6 +422,11 @@ int SystemZTTIImpl::getArithmeticInstrCost(
        // 2 * ipm sequences ; xor ; shift ; compare
        return 7;
  
+    if (UDivPow2)
+      return 1;
+    if (SDivPow2)
+      return SDivCostEstimate;
+
      // An extra extension for narrow types is needed.
      if ((Opcode == Instruction::SDiv || Opcode == Instruction::SRem))
        // sext of op(s) for narrow types
diff --git a/test/Analysis/CostModel/SystemZ/div-pow2.ll b/test/Analysis/CostModel/SystemZ/div-pow2.ll

new file mode 100644 (file)

index 0000000..9ef2dd7
--- /dev/null
+++ b/test/Analysis/CostModel/SystemZ/div-pow2.ll
@@ -0,0 +1,154 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
+
+; Scalar sdiv
+
+define i64 @fun0(i64 %a) {
+  %r = sdiv i64 %a, 2
+  ret i64 %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv i64 %a, 2
+}
+
+define i64 @fun1(i64 %a) {
+  %r = sdiv i64 %a, -4
+  ret i64 %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv i64 %a, -4
+}
+
+define i32 @fun2(i32 %a) {
+  %r = sdiv i32 %a, 8
+  ret i32 %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv i32 %a, 8
+}
+
+define i32 @fun3(i32 %a) {
+  %r = sdiv i32 %a, -16
+  ret i32 %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv i32 %a, -16
+}
+
+define i16 @fun4(i16 %a) {
+  %r = sdiv i16 %a, 32
+  ret i16 %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv i16 %a, 32
+}
+
+define i16 @fun5(i16 %a) {
+  %r = sdiv i16 %a, -64
+  ret i16 %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv i16 %a, -64
+}
+
+define i8 @fun6(i8 %a) {
+  %r = sdiv i8 %a, 64
+  ret i8 %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv i8 %a, 64
+}
+
+define i8 @fun7(i8 %a) {
+  %r = sdiv i8 %a, -128
+  ret i8 %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv i8 %a, -128
+}
+
+
+; Vector sdiv
+
+define <2 x i64> @fun8(<2 x i64> %a) {
+  %r = sdiv <2 x i64> %a, <i64 2, i64 2>
+  ret <2 x i64> %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv <2 x i64> %a, <i64 2, i64 2>
+}
+
+define <2 x i64> @fun9(<2 x i64> %a) {
+  %r = sdiv <2 x i64> %a, <i64 -4, i64 -4>
+  ret <2 x i64> %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv <2 x i64> %a, <i64 -4, i64 -4>
+}
+
+define <4 x i32> @fun10(<4 x i32> %a) {
+  %r = sdiv <4 x i32> %a, <i32 8, i32 8, i32 8, i32 8>
+  ret <4 x i32> %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv <4 x i32> %a, <i32 8, i32 8, i32 8, i32 8>
+}
+
+define <4 x i32> @fun11(<4 x i32> %a) {
+  %r = sdiv <4 x i32> %a, <i32 -16, i32 -16, i32 -16, i32 -16>
+  ret <4 x i32> %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv <4 x i32> %a, <i32 -16
+}
+
+define <8 x i16> @fun12(<8 x i16> %a) {
+  %r = sdiv <8 x i16> %a, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
+  ret <8 x i16> %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv <8 x i16> %a, <i16 32
+}
+
+define <8 x i16> @fun13(<8 x i16> %a) {
+  %r = sdiv <8 x i16> %a, <i16 -64, i16 -64, i16 -64, i16 -64, i16 -64, i16 -64, i16 -64, i16 -64>
+  ret <8 x i16> %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv <8 x i16> %a, <i16 -64
+}
+
+define <16 x i8> @fun14(<16 x i8> %a) {
+  %r = sdiv <16 x i8> %a, <i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64>
+  ret <16 x i8> %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv <16 x i8> %a, <i8 64
+}
+
+define <16 x i8> @fun15(<16 x i8> %a) {
+  %r = sdiv <16 x i8> %a, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
+  ret <16 x i8> %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv <16 x i8> %a, <i8 -128
+}
+
+; Scalar udiv
+
+define i64 @fun16(i64 %a) {
+  %r = udiv i64 %a, 2
+  ret i64 %r
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %r = udiv i64 %a, 2
+}
+
+define i32 @fun17(i32 %a) {
+  %r = udiv i32 %a, 8
+  ret i32 %r
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %r = udiv i32 %a, 8
+}
+
+define i16 @fun18(i16 %a) {
+  %r = udiv i16 %a, 32
+  ret i16 %r
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %r = udiv i16 %a, 32
+}
+
+define i8 @fun19(i8 %a) {
+  %r = udiv i8 %a, 128
+  ret i8 %r
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %r = udiv i8 %a, -128
+}
+
+; Vector udiv
+
+define <2 x i64> @fun20(<2 x i64> %a) {
+  %r = udiv <2 x i64> %a, <i64 2, i64 2>
+  ret <2 x i64> %r
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %r = udiv <2 x i64> %a, <i64 2
+}
+
+define <4 x i32> @fun21(<4 x i32> %a) {
+  %r = udiv <4 x i32> %a, <i32 8, i32 8, i32 8, i32 8>
+  ret <4 x i32> %r
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %r = udiv <4 x i32> %a, <i32 8
+}
+
+define <8 x i16> @fun22(<8 x i16> %a) {
+  %r = udiv <8 x i16> %a, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
+  ret <8 x i16> %r
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %r = udiv <8 x i16> %a, <i16 32
+}
+
+define <16 x i8> @fun23(<16 x i8> %a) {
+  %r = udiv <16 x i8> %a, <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>
+  ret <16 x i8> %r
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %r = udiv <16 x i8> %a, <i8 -128
+}
author	Jonas Paulsson <paulsson@linux.vnet.ibm.com>
	Wed, 17 May 2017 12:46:26 +0000 (12:46 +0000)
committer	Jonas Paulsson <paulsson@linux.vnet.ibm.com>
	Wed, 17 May 2017 12:46:26 +0000 (12:46 +0000)
lib/Target/SystemZ/SystemZTargetTransformInfo.cpp		patch \| blob \| history
test/Analysis/CostModel/SystemZ/div-pow2.ll	[new file with mode: 0644]	patch \| blob