[TargetTransformInfo] getIntrinsicInstrCost() scalarization estimation improved

author Jonas Paulsson <paulsson@linux.vnet.ibm.com>

Tue, 14 Mar 2017 06:35:36 +0000 (06:35 +0000)

committer Jonas Paulsson <paulsson@linux.vnet.ibm.com>

Tue, 14 Mar 2017 06:35:36 +0000 (06:35 +0000)
author Jonas Paulsson <paulsson@linux.vnet.ibm.com>
Tue, 14 Mar 2017 06:35:36 +0000 (06:35 +0000)
committer Jonas Paulsson <paulsson@linux.vnet.ibm.com>
Tue, 14 Mar 2017 06:35:36 +0000 (06:35 +0000)
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h

index f947c46268ce1e952cde2afe42c13ed2a6e105f4..2f4f3362c2639c03e540ef2f5543a2da8a232642 100644 (file)
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -628,13 +628,19 @@ public:
    ///  ((v0+v2), (v1+v3), undef, undef)
    int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const;
  
-  /// \returns The cost of Intrinsic instructions. Types analysis only.
+  /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
+  /// Three cases are handled: 1. scalar instruction 2. vector instruction
+  /// 3. scalar instruction which is to be vectorized with VF.
    int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                            ArrayRef<Type *> Tys, FastMathFlags FMF) const;
+                            ArrayRef<Value *> Args, FastMathFlags FMF,
+                            unsigned VF = 1) const;
  
-  /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
+  /// \returns The cost of Intrinsic instructions. Types analysis only.
+  /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
+  /// arguments and the return value will be computed based on types.
    int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                            ArrayRef<Value *> Args, FastMathFlags FMF) const;
+                            ArrayRef<Type *> Tys, FastMathFlags FMF,
+                            unsigned ScalarizationCostPassed = UINT_MAX) const;
  
    /// \returns The cost of Call instructions.
    int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
@@ -828,11 +834,10 @@ public:
    virtual int getReductionCost(unsigned Opcode, Type *Ty,
                                 bool IsPairwiseForm) = 0;
    virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                    ArrayRef<Type *> Tys,
-                                    FastMathFlags FMF) = 0;
+                      ArrayRef<Type *> Tys, FastMathFlags FMF,
+                      unsigned ScalarizationCostPassed) = 0;
    virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                    ArrayRef<Value *> Args,
-                                    FastMathFlags FMF) = 0;
+         ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0;
    virtual int getCallInstrCost(Function *F, Type *RetTy,
                                 ArrayRef<Type *> Tys) = 0;
    virtual unsigned getNumberOfParts(Type *Tp) = 0;
@@ -1086,13 +1091,13 @@ public:
      return Impl.getReductionCost(Opcode, Ty, IsPairwiseForm);
    }
    int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
-                            FastMathFlags FMF) override {
-    return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF);
+               FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
+    return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
+                                      ScalarizationCostPassed);
    }
    int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                            ArrayRef<Value *> Args,
-                            FastMathFlags FMF) override {
-    return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF);
+       ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override {
+    return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
    }
    int getCallInstrCost(Function *F, Type *RetTy,
                         ArrayRef<Type *> Tys) override {
diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h

index df503449cf98d260c71c16ff3e6885db8443ca0f..2455b582496163ff53e1df11575dbee1444740b2 100644 (file)
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -369,11 +369,12 @@ public:
    }
  
    unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                 ArrayRef<Type *> Tys, FastMathFlags FMF) {
+                                 ArrayRef<Type *> Tys, FastMathFlags FMF,
+                                 unsigned ScalarizationCostPassed) {
      return 1;
    }
    unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                 ArrayRef<Value *> Args, FastMathFlags FMF) {
+            ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) {
      return 1;
    }
  
diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h

index ba774b5d1607b98122a92d270f38367f3c8a49ae..039b4115c9290b48e657e9408e6da6c67603cfeb 100644 (file)
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@@ -306,16 +306,28 @@ public:
      return Cost;
    }
  
-  /// Estimate the overhead of scalarizing an instructions unique operands.
+  /// Estimate the overhead of scalarizing an instructions unique
+  /// non-constant operands. The types of the arguments are ordinarily
+  /// scalar, in which case the costs are multiplied with VF. Vector
+  /// arguments are allowed if 1 is passed for VF.
    unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
                                              unsigned VF) {
      unsigned Cost = 0;
      SmallPtrSet<const Value*, 4> UniqueOperands;
      for (const Value *A : Args) {
-      if (UniqueOperands.insert(A).second)
-        Cost += getScalarizationOverhead(VectorType::get(A->getType(), VF),
-                                         false, true);
+      if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
+        Type *VecTy = nullptr;
+        if (A->getType()->isVectorTy()) {
+          assert (VF == 1 && "Vector argument passed with VF > 1");
+          VecTy = A->getType();
+        }
+        else
+          VecTy = VectorType::get(A->getType(), VF);
+
+        Cost += getScalarizationOverhead(VecTy, false, true);
+      }
      }
+
      return Cost;
    }
  
@@ -705,18 +717,40 @@ public:
      return Cost;
    }
  
-  /// Get intrinsic cost based on arguments  
+  /// Get intrinsic cost based on arguments.
    unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
-                                 ArrayRef<Value *> Args, FastMathFlags FMF) {
+                                 ArrayRef<Value *> Args, FastMathFlags FMF,
+                                 unsigned VF = 1) {
+    unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
+    assert ((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
+
      switch (IID) {
      default: {
+      // Assume that we need to scalarize this intrinsic.
        SmallVector<Type *, 4> Types;
-      for (Value *Op : Args)
-        Types.push_back(Op->getType());
-      return static_cast<T *>(this)->getIntrinsicInstrCost(IID, RetTy, Types,
-                                                           FMF);
+      for (Value *Op : Args) {
+        Type *OpTy = Op->getType();
+        assert (VF == 1 || !OpTy->isVectorTy());
+        Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF));
+      }
+
+      if (VF > 1 && !RetTy->isVoidTy())
+        RetTy = VectorType::get(RetTy, VF);
+
+      // Compute the scalarization overhead based on Args for a vector
+      // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
+      // CostModel will pass a vector RetTy and VF is 1.
+      unsigned ScalarizationCost = UINT_MAX;
+      if (RetVF > 1 || VF > 1) {
+        ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
+        ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
+      }
+
+      return static_cast<T *>(this)->
+        getIntrinsicInstrCost(IID, RetTy, Types, FMF, ScalarizationCost);
      }
      case Intrinsic::masked_scatter: {
+      assert (VF == 1 && "Can't vectorize types here.");
        Value *Mask = Args[3];
        bool VarMask = !isa<Constant>(Mask);
        unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
@@ -727,6 +761,7 @@ public:
                                                         Alignment);
      }
      case Intrinsic::masked_gather: {
+      assert (VF == 1 && "Can't vectorize types here.");
        Value *Mask = Args[2];
        bool VarMask = !isa<Constant>(Mask);
        unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
@@ -738,19 +773,23 @@ public:
      }
    }
    
-  /// Get intrinsic cost based on argument types
+  /// Get intrinsic cost based on argument types.
+  /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
+  /// arguments and the return value will be computed based on types.
    unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
-                                 ArrayRef<Type *> Tys, FastMathFlags FMF) {
+                          ArrayRef<Type *> Tys, FastMathFlags FMF,
+                          unsigned ScalarizationCostPassed = UINT_MAX) {
      SmallVector<unsigned, 2> ISDs;
      unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
      switch (IID) {
      default: {
        // Assume that we need to scalarize this intrinsic.
-      unsigned ScalarizationCost = 0;
+      unsigned ScalarizationCost = ScalarizationCostPassed;
        unsigned ScalarCalls = 1;
        Type *ScalarRetTy = RetTy;
        if (RetTy->isVectorTy()) {
-        ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
+        if (ScalarizationCostPassed == UINT_MAX)
+          ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
          ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
          ScalarRetTy = RetTy->getScalarType();
        }
@@ -758,7 +797,8 @@ public:
        for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
          Type *Ty = Tys[i];
          if (Ty->isVectorTy()) {
-          ScalarizationCost += getScalarizationOverhead(Ty, false, true);
+          if (ScalarizationCostPassed == UINT_MAX)
+            ScalarizationCost += getScalarizationOverhead(Ty, false, true);
            ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
            Ty = Ty->getScalarType();
          }
@@ -906,7 +946,8 @@ public:
      // this will emit a costly libcall, adding call overhead and spills. Make it
      // very expensive.
      if (RetTy->isVectorTy()) {
-      unsigned ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
+      unsigned ScalarizationCost = ((ScalarizationCostPassed != UINT_MAX) ?
+         ScalarizationCostPassed : getScalarizationOverhead(RetTy, true, false));
        unsigned ScalarCalls = RetTy->getVectorNumElements();
        SmallVector<Type *, 4> ScalarTys;
        for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
@@ -919,7 +960,8 @@ public:
            IID, RetTy->getScalarType(), ScalarTys, FMF);
        for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
          if (Tys[i]->isVectorTy()) {
-          ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
+          if (ScalarizationCostPassed == UINT_MAX)
+            ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
            ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
          }
        }
diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp

index 6b77397956cdac3281bec8f609bbd9e54d35889b..757a1e50284a3d72639d3c207983d8152d8d958d 100644 (file)
--- a/lib/Analysis/CostModel.cpp
+++ b/lib/Analysis/CostModel.cpp
@@ -542,9 +542,7 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
    }
    case Instruction::Call:
      if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
-      SmallVector<Value *, 4> Args;
-      for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J)
-        Args.push_back(II->getArgOperand(J));
+      SmallVector<Value *, 4> Args(II->arg_operands());
  
        FastMathFlags FMF;
        if (auto *FPMO = dyn_cast<FPMathOperator>(II))
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp

index 3e1c1457b6df0046124c1d1ff8737df5161dff6e..4249b0cbe8531e4775aaa4e4f750299f21226c08 100644 (file)
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -378,17 +378,17 @@ int TargetTransformInfo::getInterleavedMemoryOpCost(
  }
  
  int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                               ArrayRef<Type *> Tys,
-                                               FastMathFlags FMF) const {
-  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF);
+                                    ArrayRef<Type *> Tys, FastMathFlags FMF,
+                                    unsigned ScalarizationCostPassed) const {
+  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
+                                            ScalarizationCostPassed);
    assert(Cost >= 0 && "TTI should not produce negative costs!");
    return Cost;
  }
  
  int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                               ArrayRef<Value *> Args,
-                                               FastMathFlags FMF) const {
-  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF);
+           ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) const {
+  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
    assert(Cost >= 0 && "TTI should not produce negative costs!");
    return Cost;
  }
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp

index 7629e0c95c6a143885c136d5b4b620c15b5908cd..7a92ddff253c87ee488db3b9aa4f395cec36ca4f 100644 (file)
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1370,7 +1370,8 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
  }
  
  int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
-                                      ArrayRef<Type *> Tys, FastMathFlags FMF) {
+                                      ArrayRef<Type *> Tys, FastMathFlags FMF,
+                                      unsigned ScalarizationCostPassed) {
    // Costs should match the codegen from:
    // BITREVERSE: llvm\test\CodeGen\X86\vector-bitreverse.ll
    // BSWAP: llvm\test\CodeGen\X86\bswap-vector.ll
@@ -1551,12 +1552,12 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
      if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
        return LT.first * Entry->Cost;
  
-  return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF);
+  return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF, ScalarizationCostPassed);
  }
  
  int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
-                                      ArrayRef<Value *> Args, FastMathFlags FMF) {
-  return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF);
+                     ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) {
+  return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF, VF);
  }
  
  int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h

index 63a1493002fff250f79120ff305be32e7a02bc89..0622fcf2815bfee83a17d76510d2e5b71f09a674 100644 (file)
--- a/lib/Target/X86/X86TargetTransformInfo.h
+++ b/lib/Target/X86/X86TargetTransformInfo.h
@@ -74,9 +74,11 @@ public:
                                  const SCEV *Ptr);
  
    int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
-                            ArrayRef<Type *> Tys, FastMathFlags FMF);
+                            ArrayRef<Type *> Tys, FastMathFlags FMF,
+                            unsigned ScalarizationCostPassed = UINT_MAX);
    int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
-                            ArrayRef<Value *> Args, FastMathFlags FMF);
+                            ArrayRef<Value *> Args, FastMathFlags FMF,
+                            unsigned VF = 1);
  
    int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm);
  
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp

index c01740b27d59bfb58a4424e44901cd28b163f5ec..705e1533275ff2b1554897338927c2fdde7b9041 100644 (file)
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -1127,39 +1127,51 @@ namespace {
          FastMathFlags FMFCI;
          if (auto *FPMOCI = dyn_cast<FPMathOperator>(CI))
            FMFCI = FPMOCI->getFastMathFlags();
+        SmallVector<Value *, 4> IArgs(CI->arg_operands());
+        unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, IArgs, FMFCI);
  
-        SmallVector<Type*, 4> Tys;
-        for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
-          Tys.push_back(CI->getArgOperand(i)->getType());
-        unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, Tys, FMFCI);
-
-        Tys.clear();
          CallInst *CJ = cast<CallInst>(J);
  
          FastMathFlags FMFCJ;
          if (auto *FPMOCJ = dyn_cast<FPMathOperator>(CJ))
            FMFCJ = FPMOCJ->getFastMathFlags();
  
-        for (unsigned i = 0, ie = CJ->getNumArgOperands(); i != ie; ++i)
-          Tys.push_back(CJ->getArgOperand(i)->getType());
-        unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, Tys, FMFCJ);
+        SmallVector<Value *, 4> JArgs(CJ->arg_operands());
+        unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, JArgs, FMFCJ);
  
-        Tys.clear();
          assert(CI->getNumArgOperands() == CJ->getNumArgOperands() &&
                 "Intrinsic argument counts differ");
+        SmallVector<Type*, 4> Tys;
+        SmallVector<Value *, 4> VecArgs;
          for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
            if ((IID == Intrinsic::powi || IID == Intrinsic::ctlz ||
-               IID == Intrinsic::cttz) && i == 1)
+               IID == Intrinsic::cttz) && i == 1) {
              Tys.push_back(CI->getArgOperand(i)->getType());
-          else
+            VecArgs.push_back(CI->getArgOperand(i));
+          }
+          else {
              Tys.push_back(getVecTypeForPair(CI->getArgOperand(i)->getType(),
                                              CJ->getArgOperand(i)->getType()));
+            // Add both operands, and then count their scalarization overhead
+            // with VF 1.
+            VecArgs.push_back(CI->getArgOperand(i));
+            VecArgs.push_back(CJ->getArgOperand(i));
+          }
          }
  
+        // Compute the scalarization cost here with the original operands (to
+        // check for uniqueness etc), and then call getIntrinsicInstrCost()
+        // with the constructed vector types.
+        Type *RetTy = getVecTypeForPair(IT1, JT1);
+        unsigned ScalarizationCost = 0;
+        if (!RetTy->isVoidTy())
+          ScalarizationCost += TTI->getScalarizationOverhead(RetTy, true, false);
+        ScalarizationCost += TTI->getOperandsScalarizationOverhead(VecArgs, 1);
+
          FastMathFlags FMFV = FMFCI;
          FMFV &= FMFCJ;
-        Type *RetTy = getVecTypeForPair(IT1, JT1);
-        unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys, FMFV);
+        unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys, FMFV,
+                                                    ScalarizationCost);
  
          if (VCost > ICost + JCost)
            return false;
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp

index 43dca02f65f4c1ee1951d8bcff7538747f3ed032..c13bcea737e65627c8cf0012137313af4ddb324e 100644 (file)
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3784,16 +3784,12 @@ static unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF,
    Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
    assert(ID && "Expected intrinsic call!");
  
-  Type *RetTy = ToVectorTy(CI->getType(), VF);
-  SmallVector<Type *, 4> Tys;
-  for (Value *ArgOperand : CI->arg_operands())
-    Tys.push_back(ToVectorTy(ArgOperand->getType(), VF));
-
    FastMathFlags FMF;
    if (auto *FPMO = dyn_cast<FPMathOperator>(CI))
      FMF = FPMO->getFastMathFlags();
  
-  return TTI.getIntrinsicInstrCost(ID, RetTy, Tys, FMF);
+  SmallVector<Value *, 4> Operands(CI->arg_operands());
+  return TTI.getIntrinsicInstrCost(ID, CI->getType(), Operands, FMF, VF);
  }
  
  static Type *smallestIntegerVectorType(Type *T1, Type *T2) {
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp

index 40adf2e79beada5542d9b0c7633dec5044209b0b..b9df89e3eec91cbeab8a64182233efd2533c37fb 100644 (file)
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1877,12 +1877,9 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
        Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
  
        // Calculate the cost of the scalar and vector calls.
-      SmallVector<Type*, 4> ScalarTys, VecTys;
-      for (unsigned op = 0, opc = CI->getNumArgOperands(); op!= opc; ++op) {
+      SmallVector<Type*, 4> ScalarTys;
+      for (unsigned op = 0, opc = CI->getNumArgOperands(); op!= opc; ++op)
          ScalarTys.push_back(CI->getArgOperand(op)->getType());
-        VecTys.push_back(VectorType::get(CI->getArgOperand(op)->getType(),
-                                         VecTy->getNumElements()));
-      }
  
        FastMathFlags FMF;
        if (auto *FPMO = dyn_cast<FPMathOperator>(CI))
@@ -1891,7 +1888,9 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
        int ScalarCallCost = VecTy->getNumElements() *
            TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF);
  
-      int VecCallCost = TTI->getIntrinsicInstrCost(ID, VecTy, VecTys, FMF);
+      SmallVector<Value *, 4> Args(CI->arg_operands());
+      int VecCallCost = TTI->getIntrinsicInstrCost(ID, CI->getType(), Args, FMF,
+                                                   VecTy->getNumElements());
  
        DEBUG(dbgs() << "SLP: Call cost "<< VecCallCost - ScalarCallCost
              << " (" << VecCallCost  << "-" <<  ScalarCallCost << ")"
diff --git a/test/Analysis/CostModel/X86/arith-fp.ll b/test/Analysis/CostModel/X86/arith-fp.ll

index 689442f67a130280d23e8e44a5b2e20f0d411d15..e5043010c11f1504e324eb0913971f1cddf589ca 100644 (file)
--- a/test/Analysis/CostModel/X86/arith-fp.ll
+++ b/test/Analysis/CostModel/X86/arith-fp.ll
@@ -456,20 +456,20 @@ define i32 @fma(i32 %arg) {
    ; AVX2: cost of 1 {{.*}} %F32 = call float @llvm.fma.f32
    ; AVX512: cost of 1 {{.*}} %F32 = call float @llvm.fma.f32
    %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
-  ; SSE2: cost of 52 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
-  ; SSE42: cost of 52 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
+  ; SSE2: cost of 43 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
+  ; SSE42: cost of 43 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
    ; AVX: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
    ; AVX2: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
    ; AVX512: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
    %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
-  ; SSE2: cost of 104 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
-  ; SSE42: cost of 104 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
+  ; SSE2: cost of 86 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
+  ; SSE42: cost of 86 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
    ; AVX: cost of 1 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
    ; AVX2: cost of 1 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
    ; AVX512: cost of 1 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
    %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
-  ; SSE2: cost of 208 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
-  ; SSE42: cost of 208 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
+  ; SSE2: cost of 172 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
+  ; SSE42: cost of 172 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
    ; AVX: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
    ; AVX2: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
    ; AVX512: cost of 1 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
@@ -481,20 +481,20 @@ define i32 @fma(i32 %arg) {
    ; AVX2: cost of 1 {{.*}} %F64 = call double @llvm.fma.f64
    ; AVX512: cost of 1 {{.*}} %F64 = call double @llvm.fma.f64
    %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
-  ; SSE2: cost of 24 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
-  ; SSE42: cost of 24 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
+  ; SSE2: cost of 21 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
+  ; SSE42: cost of 21 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
    ; AVX: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
    ; AVX2: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
    ; AVX512: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
    %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
-  ; SSE2: cost of 48 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
-  ; SSE42: cost of 48 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
+  ; SSE2: cost of 42 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
+  ; SSE42: cost of 42 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
    ; AVX: cost of 1 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
    ; AVX2: cost of 1 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
    ; AVX512: cost of 1 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
    %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
-  ; SSE2: cost of 96 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
-  ; SSE42: cost of 96 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
+  ; SSE2: cost of 84 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
+  ; SSE42: cost of 84 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
    ; AVX: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
    ; AVX2: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
    ; AVX512: cost of 1 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
diff --git a/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll b/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll

index 5add5a24d7647eea8c4699760d4929c77a61dc1d..54ee8fc6e73fd3e9059c042990820d8506f50d61 100644 (file)
--- a/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
+++ b/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
@@ -170,8 +170,8 @@ entry:
  ; VF_2-LABEL: Checking a loop in "i64_factor_8"
  ; VF_2:         Found an estimated cost of 6 for VF 2 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
  ; VF_2-NEXT:    Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
-; VF_2-NEXT:    Found an estimated cost of 10 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_2-NEXT:    Found an estimated cost of 10 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8
+; VF_2-NEXT:    Found an estimated cost of 7 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8
+; VF_2-NEXT:    Found an estimated cost of 7 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8
  for.body:
    %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
    %tmp0 = getelementptr inbounds %i64.8, %i64.8* %data, i64 %i, i32 2
diff --git a/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll b/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll

index c85dc63c31a0759967f2d633a3099e15f7805fd7..29adec049f674995526300708ce5c69d67f85b61 100644 (file)
--- a/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
+++ b/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
@@ -124,12 +124,12 @@ entry:
  ; VF_4:         Found an estimated cost of 40 for VF 4 For instruction: %tmp2 = load half, half* %tmp0, align 2
  ; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, half* %tmp1, align 2
  ; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_4-NEXT:    Found an estimated cost of 40 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2
+; VF_4-NEXT:    Found an estimated cost of 32 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2
  ; VF_8-LABEL: Checking a loop in "half_factor_2"
  ; VF_8:         Found an estimated cost of 80 for VF 8 For instruction: %tmp2 = load half, half* %tmp0, align 2
  ; VF_8-NEXT:    Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load half, half* %tmp1, align 2
  ; VF_8-NEXT:    Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_8-NEXT:    Found an estimated cost of 80 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2
+; VF_8-NEXT:    Found an estimated cost of 64 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2
  for.body:
    %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
    %tmp0 = getelementptr inbounds %half.2, %half.2* %data, i64 %i, i32 0
author	Jonas Paulsson <paulsson@linux.vnet.ibm.com>
	Tue, 14 Mar 2017 06:35:36 +0000 (06:35 +0000)
committer	Jonas Paulsson <paulsson@linux.vnet.ibm.com>
	Tue, 14 Mar 2017 06:35:36 +0000 (06:35 +0000)
include/llvm/Analysis/TargetTransformInfo.h		patch \| blob \| history
include/llvm/Analysis/TargetTransformInfoImpl.h		patch \| blob \| history
include/llvm/CodeGen/BasicTTIImpl.h		patch \| blob \| history
lib/Analysis/CostModel.cpp		patch \| blob \| history
lib/Analysis/TargetTransformInfo.cpp		patch \| blob \| history
lib/Target/X86/X86TargetTransformInfo.cpp		patch \| blob \| history
lib/Target/X86/X86TargetTransformInfo.h		patch \| blob \| history
lib/Transforms/Vectorize/BBVectorize.cpp		patch \| blob \| history
lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history
lib/Transforms/Vectorize/SLPVectorizer.cpp		patch \| blob \| history
test/Analysis/CostModel/X86/arith-fp.ll		patch \| blob \| history
test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll		patch \| blob \| history
test/Transforms/LoopVectorize/ARM/interleaved_cost.ll		patch \| blob \| history