From 7cb6abb7cbf8a383b193004f6c49d6418b95e22b Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Thu, 26 Jan 2017 07:03:25 +0000 Subject: [PATCH] [TargetTransformInfo] Refactor and improve getScalarizationOverhead() Refactoring to remove duplications of this method. New method getOperandsScalarizationOverhead() that looks at the present unique operands and add extract costs for them. Old behaviour was to just add extract costs for one operand of the type always, which still happens in getArithmeticInstrCost() if no operands are provided by the caller. This is a good start of improving on this, but there are more places that can be improved by using getOperandsScalarizationOverhead(). Review: Hal Finkel https://reviews.llvm.org/D29017 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@293155 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/TargetTransformInfo.h | 17 +++++ .../llvm/Analysis/TargetTransformInfoImpl.h | 7 ++ include/llvm/CodeGen/BasicTTIImpl.h | 64 ++++++++++++------- lib/Analysis/TargetTransformInfo.cpp | 11 ++++ .../AArch64/AArch64TargetTransformInfo.h | 4 -- lib/Target/ARM/ARMTargetTransformInfo.h | 4 -- lib/Target/X86/X86TargetTransformInfo.cpp | 14 ---- lib/Target/X86/X86TargetTransformInfo.h | 2 - lib/Transforms/Vectorize/LoopVectorize.cpp | 48 +++++--------- 9 files changed, 92 insertions(+), 79 deletions(-) diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 209f05c279d..89f0b22a3bf 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -411,6 +411,11 @@ public: /// containing this constant value for the target. bool shouldBuildLookupTablesForConstant(Constant *C) const; + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + + unsigned getOperandsScalarizationOverhead(ArrayRef Args, + unsigned VF) const; + /// \brief Don't restrict interleaved unrolling to small loops. bool enableAggressiveInterleaving(bool LoopHasReductions) const; @@ -743,6 +748,10 @@ public: virtual unsigned getJumpBufSize() = 0; virtual bool shouldBuildLookupTables() = 0; virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0; + virtual unsigned + getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0; + virtual unsigned getOperandsScalarizationOverhead(ArrayRef Args, + unsigned VF) = 0; virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; virtual bool enableInterleavedAccessVectorization() = 0; virtual bool isFPVectorizationPotentiallyUnsafe() = 0; @@ -933,6 +942,14 @@ public: bool shouldBuildLookupTablesForConstant(Constant *C) override { return Impl.shouldBuildLookupTablesForConstant(C); } + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { + return Impl.getScalarizationOverhead(Ty, Insert, Extract); + } + unsigned getOperandsScalarizationOverhead(ArrayRef Args, + unsigned VF) { + return Impl.getOperandsScalarizationOverhead(Args, VF); + } + bool enableAggressiveInterleaving(bool LoopHasReductions) override { return Impl.enableAggressiveInterleaving(LoopHasReductions); } diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h index cafc40723c9..42cff56cd39 100644 --- a/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -251,6 +251,13 @@ public: bool shouldBuildLookupTables() { return true; } bool shouldBuildLookupTablesForConstant(Constant *C) { return true; } + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { + return 0; + } + + unsigned getOperandsScalarizationOverhead(ArrayRef Args, + unsigned VF) { return 0; } + bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; } bool enableInterleavedAccessVectorization() { return false; } diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h index 7efdbcccdef..f4cd21a88f1 100644 --- a/include/llvm/CodeGen/BasicTTIImpl.h +++ b/include/llvm/CodeGen/BasicTTIImpl.h @@ -42,24 +42,6 @@ private: typedef TargetTransformInfoImplCRTPBase BaseT; typedef TargetTransformInfo TTI; - /// Estimate the overhead of scalarizing an instruction. Insert and Extract - /// are set if the result needs to be inserted and/or extracted from vectors. - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { - assert(Ty->isVectorTy() && "Can only scalarize vectors"); - unsigned Cost = 0; - - for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { - if (Insert) - Cost += static_cast(this) - ->getVectorInstrCost(Instruction::InsertElement, Ty, i); - if (Extract) - Cost += static_cast(this) - ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); - } - - return Cost; - } - /// Estimate a cost of shuffle as a sequence of extract and insert /// operations. unsigned getPermuteShuffleOverhead(Type *Ty) { @@ -301,6 +283,37 @@ public: unsigned getRegisterBitWidth(bool Vector) { return 32; } + /// Estimate the overhead of scalarizing an instruction. Insert and Extract + /// are set if the result needs to be inserted and/or extracted from vectors. + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { + assert(Ty->isVectorTy() && "Can only scalarize vectors"); + unsigned Cost = 0; + + for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { + if (Insert) + Cost += static_cast(this) + ->getVectorInstrCost(Instruction::InsertElement, Ty, i); + if (Extract) + Cost += static_cast(this) + ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); + } + + return Cost; + } + + /// Estimate the overhead of scalarizing an instructions unique operands. + unsigned getOperandsScalarizationOverhead(ArrayRef Args, + unsigned VF) { + unsigned Cost = 0; + SmallPtrSet UniqueOperands; + for (const Value *A : Args) { + if (UniqueOperands.insert(A).second) + Cost += getScalarizationOverhead(VectorType::get(A->getType(), VF), + false, true); + } + return Cost; + } + unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } unsigned getArithmeticInstrCost( @@ -341,10 +354,17 @@ public: unsigned Num = Ty->getVectorNumElements(); unsigned Cost = static_cast(this) ->getArithmeticInstrCost(Opcode, Ty->getScalarType()); - // return the cost of multiple scalar invocation plus the cost of - // inserting - // and extracting the values. - return getScalarizationOverhead(Ty, true, true) + Num * Cost; + // Return the cost of multiple scalar invocation plus the cost of + // inserting and extracting the values. + unsigned TotCost = getScalarizationOverhead(Ty, true, false) + Num * Cost; + if (!Args.empty()) + TotCost += getOperandsScalarizationOverhead(Args, Num); + else + // When no information on arguments is provided, we add the cost + // associated with one argument as a heuristic. + TotCost += getScalarizationOverhead(Ty, false, true); + + return TotCost; } // We don't know anything about this scalar instruction. diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 5c0d1aac1b9..2727c0c4795 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -182,6 +182,17 @@ bool TargetTransformInfo::shouldBuildLookupTablesForConstant(Constant *C) const return TTIImpl->shouldBuildLookupTablesForConstant(C); } +unsigned TargetTransformInfo:: +getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const { + return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract); +} + +unsigned TargetTransformInfo:: +getOperandsScalarizationOverhead(ArrayRef Args, + unsigned VF) const { + return TTIImpl->getOperandsScalarizationOverhead(Args, VF); +} + bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const { return TTIImpl->enableAggressiveInterleaving(LoopHasReductions); } diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h index 18287ed6653..a7f7fb9457a 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -34,10 +34,6 @@ class AArch64TTIImpl : public BasicTTIImplBase { const AArch64Subtarget *ST; const AArch64TargetLowering *TLI; - /// Estimate the overhead of scalarizing an instruction. Insert and Extract - /// are set if the result needs to be inserted and/or extracted from vectors. - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract); - const AArch64Subtarget *getST() const { return ST; } const AArch64TargetLowering *getTLI() const { return TLI; } diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h index 3c83cd92a61..5ee1f7c4b0e 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/lib/Target/ARM/ARMTargetTransformInfo.h @@ -33,10 +33,6 @@ class ARMTTIImpl : public BasicTTIImplBase { const ARMSubtarget *ST; const ARMTargetLowering *TLI; - /// Estimate the overhead of scalarizing an instruction. Insert and Extract - /// are set if the result needs to be inserted and/or extracted from vectors. - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract); - const ARMSubtarget *getST() const { return ST; } const ARMTargetLowering *getTLI() const { return TLI; } diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index bd7fa9ca617..3ebfdd0ede7 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1577,20 +1577,6 @@ int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost; } -int X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { - assert (Ty->isVectorTy() && "Can only scalarize vectors"); - int Cost = 0; - - for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { - if (Insert) - Cost += getVectorInstrCost(Instruction::InsertElement, Ty, i); - if (Extract) - Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, i); - } - - return Cost; -} - int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) { // Handle non-power-of-two vectors such as <3 x float> diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h index ecaaf951cff..63a1493002f 100644 --- a/lib/Target/X86/X86TargetTransformInfo.h +++ b/lib/Target/X86/X86TargetTransformInfo.h @@ -33,8 +33,6 @@ class X86TTIImpl : public BasicTTIImplBase { const X86Subtarget *ST; const X86TargetLowering *TLI; - int getScalarizationOverhead(Type *Ty, bool Insert, bool Extract); - const X86Subtarget *getST() const { return ST; } const X86TargetLowering *getTLI() const { return TLI; } diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 6ffe29a5c98..ce3f3b0afef 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3598,37 +3598,18 @@ static Value *addFastMathFlag(Value *V) { return V; } -/// \brief Estimate the overhead of scalarizing a value based on its type. -/// Insert and Extract are set if the result needs to be inserted and/or -/// extracted from vectors. -static unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract, - const TargetTransformInfo &TTI) { - if (Ty->isVoidTy()) - return 0; - - assert(Ty->isVectorTy() && "Can only scalarize vectors"); - unsigned Cost = 0; - - for (unsigned I = 0, E = Ty->getVectorNumElements(); I < E; ++I) { - if (Extract) - Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, Ty, I); - if (Insert) - Cost += TTI.getVectorInstrCost(Instruction::InsertElement, Ty, I); - } - - return Cost; -} - /// \brief Estimate the overhead of scalarizing an Instruction based on the /// types of its operands and return value. static unsigned getScalarizationOverhead(SmallVectorImpl &OpTys, Type *RetTy, const TargetTransformInfo &TTI) { - unsigned ScalarizationCost = - getScalarizationOverhead(RetTy, true, false, TTI); + unsigned ScalarizationCost = 0; + + if (!RetTy->isVoidTy()) + ScalarizationCost += TTI.getScalarizationOverhead(RetTy, true, false); for (Type *Ty : OpTys) - ScalarizationCost += getScalarizationOverhead(Ty, false, true, TTI); + ScalarizationCost += TTI.getScalarizationOverhead(Ty, false, true); return ScalarizationCost; } @@ -3640,14 +3621,15 @@ static unsigned getScalarizationOverhead(Instruction *I, unsigned VF, if (VF == 1) return 0; + unsigned Cost = 0; Type *RetTy = ToVectorTy(I->getType(), VF); + if (!RetTy->isVoidTy()) + Cost += TTI.getScalarizationOverhead(RetTy, true, false); - SmallVector OpTys; - unsigned OperandsNum = I->getNumOperands(); - for (unsigned OpInd = 0; OpInd < OperandsNum; ++OpInd) - OpTys.push_back(ToVectorTy(I->getOperand(OpInd)->getType(), VF)); + SmallVector Operands(I->operand_values()); + Cost += TTI.getOperandsScalarizationOverhead(Operands, VF); - return getScalarizationOverhead(OpTys, RetTy, TTI); + return Cost; } // Estimate cost of a call instruction CI if it were vectorized with factor VF. @@ -6713,8 +6695,8 @@ int LoopVectorizationCostModel::computePredInstDiscount( // Compute the scalarization overhead of needed insertelement instructions // and phi nodes. if (Legal->isScalarWithPredication(I) && !I->getType()->isVoidTy()) { - ScalarCost += getScalarizationOverhead(ToVectorTy(I->getType(), VF), true, - false, TTI); + ScalarCost += TTI.getScalarizationOverhead(ToVectorTy(I->getType(), VF), + true, false); ScalarCost += VF * TTI.getCFInstrCost(Instruction::PHI); } @@ -6729,8 +6711,8 @@ int LoopVectorizationCostModel::computePredInstDiscount( if (canBeScalarized(J)) Worklist.push_back(J); else if (needsExtract(J)) - ScalarCost += getScalarizationOverhead(ToVectorTy(J->getType(), VF), - false, true, TTI); + ScalarCost += TTI.getScalarizationOverhead( + ToVectorTy(J->getType(),VF), false, true); } // Scale the total scalar cost by block probability. -- 2.40.0