[TargetTransformInfo] Refactor and improve getScalarizationOverhead()

author Jonas Paulsson <paulsson@linux.vnet.ibm.com>

Thu, 26 Jan 2017 07:03:25 +0000 (07:03 +0000)

committer Jonas Paulsson <paulsson@linux.vnet.ibm.com>

Thu, 26 Jan 2017 07:03:25 +0000 (07:03 +0000)
author Jonas Paulsson <paulsson@linux.vnet.ibm.com>
Thu, 26 Jan 2017 07:03:25 +0000 (07:03 +0000)
committer Jonas Paulsson <paulsson@linux.vnet.ibm.com>
Thu, 26 Jan 2017 07:03:25 +0000 (07:03 +0000)
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h

index 209f05c279d0354f01fd685d3dce2b407cc8e2c1..89f0b22a3bffd6de2a85e4a6de92f34dc60ba7df 100644 (file)
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -411,6 +411,11 @@ public:
    /// containing this constant value for the target.
    bool shouldBuildLookupTablesForConstant(Constant *C) const;
  
+  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
+  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+                                            unsigned VF) const;
+
    /// \brief Don't restrict interleaved unrolling to small loops.
    bool enableAggressiveInterleaving(bool LoopHasReductions) const;
  
@@ -743,6 +748,10 @@ public:
    virtual unsigned getJumpBufSize() = 0;
    virtual bool shouldBuildLookupTables() = 0;
    virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
+  virtual unsigned
+  getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
+  virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+                                                    unsigned VF) = 0;
    virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
    virtual bool enableInterleavedAccessVectorization() = 0;
    virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
@@ -933,6 +942,14 @@ public:
    bool shouldBuildLookupTablesForConstant(Constant *C) override {
      return Impl.shouldBuildLookupTablesForConstant(C);
    }
+  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
+    return Impl.getScalarizationOverhead(Ty, Insert, Extract);
+  }
+  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+                                            unsigned VF) {
+    return Impl.getOperandsScalarizationOverhead(Args, VF);
+  }
+
    bool enableAggressiveInterleaving(bool LoopHasReductions) override {
      return Impl.enableAggressiveInterleaving(LoopHasReductions);
    }
diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h

index cafc40723c9d64f37d957893e30aa9230a29bbfe..42cff56cd3931960b28e98f1919686ee3c4d95bc 100644 (file)
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -251,6 +251,13 @@ public:
    bool shouldBuildLookupTables() { return true; }
    bool shouldBuildLookupTablesForConstant(Constant *C) { return true; }
  
+  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
+    return 0;
+  }
+
+  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+                                            unsigned VF) { return 0; }
+
    bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
  
    bool enableInterleavedAccessVectorization() { return false; }
diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h

index 7efdbcccdef567bece74047dee6aadfee75880c1..f4cd21a88f11707c6496fade072444a03d219304 100644 (file)
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@@ -42,24 +42,6 @@ private:
    typedef TargetTransformInfoImplCRTPBase<T> BaseT;
    typedef TargetTransformInfo TTI;
  
-  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
-  /// are set if the result needs to be inserted and/or extracted from vectors.
-  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
-    assert(Ty->isVectorTy() && "Can only scalarize vectors");
-    unsigned Cost = 0;
-
-    for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
-      if (Insert)
-        Cost += static_cast<T *>(this)
-                    ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
-      if (Extract)
-        Cost += static_cast<T *>(this)
-                    ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
-    }
-
-    return Cost;
-  }
-
    /// Estimate a cost of shuffle as a sequence of extract and insert
    /// operations.
    unsigned getPermuteShuffleOverhead(Type *Ty) {
@@ -301,6 +283,37 @@ public:
  
    unsigned getRegisterBitWidth(bool Vector) { return 32; }
  
+  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+  /// are set if the result needs to be inserted and/or extracted from vectors.
+  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
+    assert(Ty->isVectorTy() && "Can only scalarize vectors");
+    unsigned Cost = 0;
+
+    for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
+      if (Insert)
+        Cost += static_cast<T *>(this)
+                    ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
+      if (Extract)
+        Cost += static_cast<T *>(this)
+                    ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
+    }
+
+    return Cost;
+  }
+
+  /// Estimate the overhead of scalarizing an instructions unique operands.
+  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+                                            unsigned VF) {
+    unsigned Cost = 0;
+    SmallPtrSet<const Value*, 4> UniqueOperands;
+    for (const Value *A : Args) {
+      if (UniqueOperands.insert(A).second)
+        Cost += getScalarizationOverhead(VectorType::get(A->getType(), VF),
+                                         false, true);
+    }
+    return Cost;
+  }
+
    unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
  
    unsigned getArithmeticInstrCost(
@@ -341,10 +354,17 @@ public:
        unsigned Num = Ty->getVectorNumElements();
        unsigned Cost = static_cast<T *>(this)
                            ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
-      // return the cost of multiple scalar invocation plus the cost of
-      // inserting
-      // and extracting the values.
-      return getScalarizationOverhead(Ty, true, true) + Num * Cost;
+      // Return the cost of multiple scalar invocation plus the cost of
+      // inserting and extracting the values.
+      unsigned TotCost = getScalarizationOverhead(Ty, true, false) + Num * Cost;
+      if (!Args.empty())
+        TotCost += getOperandsScalarizationOverhead(Args, Num);
+      else
+        // When no information on arguments is provided, we add the cost
+        // associated with one argument as a heuristic.
+        TotCost += getScalarizationOverhead(Ty, false, true);
+
+      return TotCost;
      }
  
      // We don't know anything about this scalar instruction.
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp

index 5c0d1aac1b98a4bdd42f405ea1bc718e96406ed5..2727c0c4795138ffd6dd763d7a5c2e0256cc9b96 100644 (file)
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -182,6 +182,17 @@ bool TargetTransformInfo::shouldBuildLookupTablesForConstant(Constant *C) const
    return TTIImpl->shouldBuildLookupTablesForConstant(C);
  }
  
+unsigned TargetTransformInfo::
+getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const {
+  return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract);
+}
+
+unsigned TargetTransformInfo::
+getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+                                 unsigned VF) const {
+  return TTIImpl->getOperandsScalarizationOverhead(Args, VF);
+}
+
  bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
    return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
  }
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h

index 18287ed6653f7840df02f26b161a9bb32886ef35..a7f7fb9457aa6aaa99c88783895e3d7f4a090f4b 100644 (file)
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -34,10 +34,6 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
    const AArch64Subtarget *ST;
    const AArch64TargetLowering *TLI;
  
-  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
-  /// are set if the result needs to be inserted and/or extracted from vectors.
-  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
-
    const AArch64Subtarget *getST() const { return ST; }
    const AArch64TargetLowering *getTLI() const { return TLI; }
  
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h

index 3c83cd92a61a0269c852958b2f45ce091be15072..5ee1f7c4b0e4a7a85c7f441dd84dbdc8822364c7 100644 (file)
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -33,10 +33,6 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
    const ARMSubtarget *ST;
    const ARMTargetLowering *TLI;
  
-  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
-  /// are set if the result needs to be inserted and/or extracted from vectors.
-  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
-
    const ARMSubtarget *getST() const { return ST; }
    const ARMTargetLowering *getTLI() const { return TLI; }
  
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp

index bd7fa9ca617ca9b59317e2c413a79f852bcc24aa..3ebfdd0ede7867f598f8c1a14d419a80bceab539 100644 (file)
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1577,20 +1577,6 @@ int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
    return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost;
  }
  
-int X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
-  assert (Ty->isVectorTy() && "Can only scalarize vectors");
-  int Cost = 0;
-
-  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
-    if (Insert)
-      Cost += getVectorInstrCost(Instruction::InsertElement, Ty, i);
-    if (Extract)
-      Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, i);
-  }
-
-  return Cost;
-}
-
  int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
                                  unsigned AddressSpace) {
    // Handle non-power-of-two vectors such as <3 x float>
diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h

index ecaaf951cff7aaf4d553b377e95f5123f329dfb8..63a1493002fff250f79120ff305be32e7a02bc89 100644 (file)
--- a/lib/Target/X86/X86TargetTransformInfo.h
+++ b/lib/Target/X86/X86TargetTransformInfo.h
@@ -33,8 +33,6 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
    const X86Subtarget *ST;
    const X86TargetLowering *TLI;
  
-  int getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
-
    const X86Subtarget *getST() const { return ST; }
    const X86TargetLowering *getTLI() const { return TLI; }
  
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp

index 6ffe29a5c9814ec539fa10d1568e157de4db5ad2..ce3f3b0afeff258df95441a302d7e7eebfc852c8 100644 (file)
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3598,37 +3598,18 @@ static Value *addFastMathFlag(Value *V) {
    return V;
  }
  
-/// \brief Estimate the overhead of scalarizing a value based on its type.
-/// Insert and Extract are set if the result needs to be inserted and/or
-/// extracted from vectors.
-static unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract,
-                                         const TargetTransformInfo &TTI) {
-  if (Ty->isVoidTy())
-    return 0;
-
-  assert(Ty->isVectorTy() && "Can only scalarize vectors");
-  unsigned Cost = 0;
-
-  for (unsigned I = 0, E = Ty->getVectorNumElements(); I < E; ++I) {
-    if (Extract)
-      Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, Ty, I);
-    if (Insert)
-      Cost += TTI.getVectorInstrCost(Instruction::InsertElement, Ty, I);
-  }
-
-  return Cost;
-}
-
  /// \brief Estimate the overhead of scalarizing an Instruction based on the
  /// types of its operands and return value.
  static unsigned getScalarizationOverhead(SmallVectorImpl<Type *> &OpTys,
                                           Type *RetTy,
                                           const TargetTransformInfo &TTI) {
-  unsigned ScalarizationCost =
-      getScalarizationOverhead(RetTy, true, false, TTI);
+  unsigned ScalarizationCost = 0;
+
+  if (!RetTy->isVoidTy())
+    ScalarizationCost += TTI.getScalarizationOverhead(RetTy, true, false);
  
    for (Type *Ty : OpTys)
-    ScalarizationCost += getScalarizationOverhead(Ty, false, true, TTI);
+    ScalarizationCost += TTI.getScalarizationOverhead(Ty, false, true);
  
    return ScalarizationCost;
  }
@@ -3640,14 +3621,15 @@ static unsigned getScalarizationOverhead(Instruction *I, unsigned VF,
    if (VF == 1)
      return 0;
  
+  unsigned Cost = 0;
    Type *RetTy = ToVectorTy(I->getType(), VF);
+  if (!RetTy->isVoidTy())
+    Cost += TTI.getScalarizationOverhead(RetTy, true, false);
  
-  SmallVector<Type *, 4> OpTys;
-  unsigned OperandsNum = I->getNumOperands();
-  for (unsigned OpInd = 0; OpInd < OperandsNum; ++OpInd)
-    OpTys.push_back(ToVectorTy(I->getOperand(OpInd)->getType(), VF));
+  SmallVector<const Value *, 4> Operands(I->operand_values());
+  Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
  
-  return getScalarizationOverhead(OpTys, RetTy, TTI);
+  return Cost;
  }
  
  // Estimate cost of a call instruction CI if it were vectorized with factor VF.
@@ -6713,8 +6695,8 @@ int LoopVectorizationCostModel::computePredInstDiscount(
      // Compute the scalarization overhead of needed insertelement instructions
      // and phi nodes.
      if (Legal->isScalarWithPredication(I) && !I->getType()->isVoidTy()) {
-      ScalarCost += getScalarizationOverhead(ToVectorTy(I->getType(), VF), true,
-                                             false, TTI);
+      ScalarCost += TTI.getScalarizationOverhead(ToVectorTy(I->getType(), VF),
+                                                 true, false);
        ScalarCost += VF * TTI.getCFInstrCost(Instruction::PHI);
      }
  
@@ -6729,8 +6711,8 @@ int LoopVectorizationCostModel::computePredInstDiscount(
          if (canBeScalarized(J))
            Worklist.push_back(J);
          else if (needsExtract(J))
-          ScalarCost += getScalarizationOverhead(ToVectorTy(J->getType(), VF),
-                                                 false, true, TTI);
+          ScalarCost += TTI.getScalarizationOverhead(
+                              ToVectorTy(J->getType(),VF), false, true);
        }
  
      // Scale the total scalar cost by block probability.
author	Jonas Paulsson <paulsson@linux.vnet.ibm.com>
	Thu, 26 Jan 2017 07:03:25 +0000 (07:03 +0000)
committer	Jonas Paulsson <paulsson@linux.vnet.ibm.com>
	Thu, 26 Jan 2017 07:03:25 +0000 (07:03 +0000)
include/llvm/Analysis/TargetTransformInfo.h		patch \| blob \| history
include/llvm/Analysis/TargetTransformInfoImpl.h		patch \| blob \| history
include/llvm/CodeGen/BasicTTIImpl.h		patch \| blob \| history
lib/Analysis/TargetTransformInfo.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64TargetTransformInfo.h		patch \| blob \| history
lib/Target/ARM/ARMTargetTransformInfo.h		patch \| blob \| history
lib/Target/X86/X86TargetTransformInfo.cpp		patch \| blob \| history
lib/Target/X86/X86TargetTransformInfo.h		patch \| blob \| history
lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history