From 2460452d77d0ceb376158385e096102c46082e2f Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Wed, 17 May 2017 12:46:26 +0000 Subject: [PATCH] [SystemZ] Modelling of costs of divisions with a constant power of 2. Such divisions will eventually be implemented with shifts which should be reflected in the cost function. Review: Ulrich Weigand git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@303254 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../SystemZ/SystemZTargetTransformInfo.cpp | 34 +++- test/Analysis/CostModel/SystemZ/div-pow2.ll | 154 ++++++++++++++++++ 2 files changed, 187 insertions(+), 1 deletion(-) create mode 100644 test/Analysis/CostModel/SystemZ/div-pow2.ll diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index f56b238f91e..6a3dc6799c4 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -325,6 +325,30 @@ int SystemZTTIImpl::getArithmeticInstrCost( unsigned ScalarBits = Ty->getScalarSizeInBits(); + // Div with a constant which is a power of 2 will be converted by + // DAGCombiner to use shifts. With vector shift-element instructions, a + // vector sdiv costs about as much as a scalar one. + const unsigned SDivCostEstimate = 4; + bool SDivPow2 = false; + bool UDivPow2 = false; + if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv) && + Args.size() == 2) { + const ConstantInt *CI = nullptr; + if (const Constant *C = dyn_cast(Args[1])) { + if (C->getType()->isVectorTy()) + CI = dyn_cast_or_null(C->getSplatValue()); + else + CI = dyn_cast(C); + } + if (CI != nullptr && + (CI->getValue().isPowerOf2() || (-CI->getValue()).isPowerOf2())) { + if (Opcode == Instruction::SDiv) + SDivPow2 = true; + else + UDivPow2 = true; + } + } + if (Ty->isVectorTy()) { assert (ST->hasVector() && "getArithmeticInstrCost() called with vector type."); unsigned VF = Ty->getVectorNumElements(); @@ -333,10 +357,13 @@ int SystemZTTIImpl::getArithmeticInstrCost( // These vector operations are custom handled, but are still supported // with one instruction per vector, regardless of element size. if (Opcode == Instruction::Shl || Opcode == Instruction::LShr || - Opcode == Instruction::AShr) { + Opcode == Instruction::AShr || UDivPow2) { return NumVectors; } + if (SDivPow2) + return (NumVectors * SDivCostEstimate); + // These FP operations are supported with a single vector instruction for // double (base implementation assumes float generally costs 2). For // FP128, the scalar cost is 1, and there is no overhead since the values @@ -395,6 +422,11 @@ int SystemZTTIImpl::getArithmeticInstrCost( // 2 * ipm sequences ; xor ; shift ; compare return 7; + if (UDivPow2) + return 1; + if (SDivPow2) + return SDivCostEstimate; + // An extra extension for narrow types is needed. if ((Opcode == Instruction::SDiv || Opcode == Instruction::SRem)) // sext of op(s) for narrow types diff --git a/test/Analysis/CostModel/SystemZ/div-pow2.ll b/test/Analysis/CostModel/SystemZ/div-pow2.ll new file mode 100644 index 00000000000..9ef2dd71e8f --- /dev/null +++ b/test/Analysis/CostModel/SystemZ/div-pow2.ll @@ -0,0 +1,154 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s + +; Scalar sdiv + +define i64 @fun0(i64 %a) { + %r = sdiv i64 %a, 2 + ret i64 %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv i64 %a, 2 +} + +define i64 @fun1(i64 %a) { + %r = sdiv i64 %a, -4 + ret i64 %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv i64 %a, -4 +} + +define i32 @fun2(i32 %a) { + %r = sdiv i32 %a, 8 + ret i32 %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv i32 %a, 8 +} + +define i32 @fun3(i32 %a) { + %r = sdiv i32 %a, -16 + ret i32 %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv i32 %a, -16 +} + +define i16 @fun4(i16 %a) { + %r = sdiv i16 %a, 32 + ret i16 %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv i16 %a, 32 +} + +define i16 @fun5(i16 %a) { + %r = sdiv i16 %a, -64 + ret i16 %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv i16 %a, -64 +} + +define i8 @fun6(i8 %a) { + %r = sdiv i8 %a, 64 + ret i8 %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv i8 %a, 64 +} + +define i8 @fun7(i8 %a) { + %r = sdiv i8 %a, -128 + ret i8 %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv i8 %a, -128 +} + + +; Vector sdiv + +define <2 x i64> @fun8(<2 x i64> %a) { + %r = sdiv <2 x i64> %a, + ret <2 x i64> %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <2 x i64> %a, +} + +define <2 x i64> @fun9(<2 x i64> %a) { + %r = sdiv <2 x i64> %a, + ret <2 x i64> %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <2 x i64> %a, +} + +define <4 x i32> @fun10(<4 x i32> %a) { + %r = sdiv <4 x i32> %a, + ret <4 x i32> %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <4 x i32> %a, +} + +define <4 x i32> @fun11(<4 x i32> %a) { + %r = sdiv <4 x i32> %a, + ret <4 x i32> %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <4 x i32> %a, @fun12(<8 x i16> %a) { + %r = sdiv <8 x i16> %a, + ret <8 x i16> %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <8 x i16> %a, @fun13(<8 x i16> %a) { + %r = sdiv <8 x i16> %a, + ret <8 x i16> %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <8 x i16> %a, @fun14(<16 x i8> %a) { + %r = sdiv <16 x i8> %a, + ret <16 x i8> %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <16 x i8> %a, @fun15(<16 x i8> %a) { + %r = sdiv <16 x i8> %a, + ret <16 x i8> %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <16 x i8> %a, @fun20(<2 x i64> %a) { + %r = udiv <2 x i64> %a, + ret <2 x i64> %r +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <2 x i64> %a, @fun21(<4 x i32> %a) { + %r = udiv <4 x i32> %a, + ret <4 x i32> %r +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <4 x i32> %a, @fun22(<8 x i16> %a) { + %r = udiv <8 x i16> %a, + ret <8 x i16> %r +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <8 x i16> %a, @fun23(<16 x i8> %a) { + %r = udiv <16 x i8> %a, + ret <16 x i8> %r +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <16 x i8> %a,