/// containing this constant value for the target.
bool shouldBuildLookupTablesForConstant(Constant *C) const;
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
+ unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+ unsigned VF) const;
+
/// \brief Don't restrict interleaved unrolling to small loops.
bool enableAggressiveInterleaving(bool LoopHasReductions) const;
virtual unsigned getJumpBufSize() = 0;
virtual bool shouldBuildLookupTables() = 0;
virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
+ virtual unsigned
+ getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
+ virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+ unsigned VF) = 0;
virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
virtual bool enableInterleavedAccessVectorization() = 0;
virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
bool shouldBuildLookupTablesForConstant(Constant *C) override {
return Impl.shouldBuildLookupTablesForConstant(C);
}
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
+ return Impl.getScalarizationOverhead(Ty, Insert, Extract);
+ }
+ unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+ unsigned VF) {
+ return Impl.getOperandsScalarizationOverhead(Args, VF);
+ }
+
bool enableAggressiveInterleaving(bool LoopHasReductions) override {
return Impl.enableAggressiveInterleaving(LoopHasReductions);
}
bool shouldBuildLookupTables() { return true; }
bool shouldBuildLookupTablesForConstant(Constant *C) { return true; }
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
+ return 0;
+ }
+
+ unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+ unsigned VF) { return 0; }
+
bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
bool enableInterleavedAccessVectorization() { return false; }
typedef TargetTransformInfoImplCRTPBase<T> BaseT;
typedef TargetTransformInfo TTI;
- /// Estimate the overhead of scalarizing an instruction. Insert and Extract
- /// are set if the result needs to be inserted and/or extracted from vectors.
- unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
- assert(Ty->isVectorTy() && "Can only scalarize vectors");
- unsigned Cost = 0;
-
- for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
- if (Insert)
- Cost += static_cast<T *>(this)
- ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
- if (Extract)
- Cost += static_cast<T *>(this)
- ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
- }
-
- return Cost;
- }
-
/// Estimate a cost of shuffle as a sequence of extract and insert
/// operations.
unsigned getPermuteShuffleOverhead(Type *Ty) {
unsigned getRegisterBitWidth(bool Vector) { return 32; }
+ /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+ /// are set if the result needs to be inserted and/or extracted from vectors.
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
+ assert(Ty->isVectorTy() && "Can only scalarize vectors");
+ unsigned Cost = 0;
+
+ for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
+ if (Insert)
+ Cost += static_cast<T *>(this)
+ ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
+ if (Extract)
+ Cost += static_cast<T *>(this)
+ ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
+ }
+
+ return Cost;
+ }
+
+ /// Estimate the overhead of scalarizing an instructions unique operands.
+ unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+ unsigned VF) {
+ unsigned Cost = 0;
+ SmallPtrSet<const Value*, 4> UniqueOperands;
+ for (const Value *A : Args) {
+ if (UniqueOperands.insert(A).second)
+ Cost += getScalarizationOverhead(VectorType::get(A->getType(), VF),
+ false, true);
+ }
+ return Cost;
+ }
+
unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
unsigned getArithmeticInstrCost(
unsigned Num = Ty->getVectorNumElements();
unsigned Cost = static_cast<T *>(this)
->getArithmeticInstrCost(Opcode, Ty->getScalarType());
- // return the cost of multiple scalar invocation plus the cost of
- // inserting
- // and extracting the values.
- return getScalarizationOverhead(Ty, true, true) + Num * Cost;
+ // Return the cost of multiple scalar invocation plus the cost of
+ // inserting and extracting the values.
+ unsigned TotCost = getScalarizationOverhead(Ty, true, false) + Num * Cost;
+ if (!Args.empty())
+ TotCost += getOperandsScalarizationOverhead(Args, Num);
+ else
+ // When no information on arguments is provided, we add the cost
+ // associated with one argument as a heuristic.
+ TotCost += getScalarizationOverhead(Ty, false, true);
+
+ return TotCost;
}
// We don't know anything about this scalar instruction.
return TTIImpl->shouldBuildLookupTablesForConstant(C);
}
+unsigned TargetTransformInfo::
+getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const {
+ return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract);
+}
+
+unsigned TargetTransformInfo::
+getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+ unsigned VF) const {
+ return TTIImpl->getOperandsScalarizationOverhead(Args, VF);
+}
+
bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
}
const AArch64Subtarget *ST;
const AArch64TargetLowering *TLI;
- /// Estimate the overhead of scalarizing an instruction. Insert and Extract
- /// are set if the result needs to be inserted and/or extracted from vectors.
- unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
-
const AArch64Subtarget *getST() const { return ST; }
const AArch64TargetLowering *getTLI() const { return TLI; }
const ARMSubtarget *ST;
const ARMTargetLowering *TLI;
- /// Estimate the overhead of scalarizing an instruction. Insert and Extract
- /// are set if the result needs to be inserted and/or extracted from vectors.
- unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
-
const ARMSubtarget *getST() const { return ST; }
const ARMTargetLowering *getTLI() const { return TLI; }
return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost;
}
-int X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
- assert (Ty->isVectorTy() && "Can only scalarize vectors");
- int Cost = 0;
-
- for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
- if (Insert)
- Cost += getVectorInstrCost(Instruction::InsertElement, Ty, i);
- if (Extract)
- Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, i);
- }
-
- return Cost;
-}
-
int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace) {
// Handle non-power-of-two vectors such as <3 x float>
const X86Subtarget *ST;
const X86TargetLowering *TLI;
- int getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
-
const X86Subtarget *getST() const { return ST; }
const X86TargetLowering *getTLI() const { return TLI; }
return V;
}
-/// \brief Estimate the overhead of scalarizing a value based on its type.
-/// Insert and Extract are set if the result needs to be inserted and/or
-/// extracted from vectors.
-static unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract,
- const TargetTransformInfo &TTI) {
- if (Ty->isVoidTy())
- return 0;
-
- assert(Ty->isVectorTy() && "Can only scalarize vectors");
- unsigned Cost = 0;
-
- for (unsigned I = 0, E = Ty->getVectorNumElements(); I < E; ++I) {
- if (Extract)
- Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, Ty, I);
- if (Insert)
- Cost += TTI.getVectorInstrCost(Instruction::InsertElement, Ty, I);
- }
-
- return Cost;
-}
-
/// \brief Estimate the overhead of scalarizing an Instruction based on the
/// types of its operands and return value.
static unsigned getScalarizationOverhead(SmallVectorImpl<Type *> &OpTys,
Type *RetTy,
const TargetTransformInfo &TTI) {
- unsigned ScalarizationCost =
- getScalarizationOverhead(RetTy, true, false, TTI);
+ unsigned ScalarizationCost = 0;
+
+ if (!RetTy->isVoidTy())
+ ScalarizationCost += TTI.getScalarizationOverhead(RetTy, true, false);
for (Type *Ty : OpTys)
- ScalarizationCost += getScalarizationOverhead(Ty, false, true, TTI);
+ ScalarizationCost += TTI.getScalarizationOverhead(Ty, false, true);
return ScalarizationCost;
}
if (VF == 1)
return 0;
+ unsigned Cost = 0;
Type *RetTy = ToVectorTy(I->getType(), VF);
+ if (!RetTy->isVoidTy())
+ Cost += TTI.getScalarizationOverhead(RetTy, true, false);
- SmallVector<Type *, 4> OpTys;
- unsigned OperandsNum = I->getNumOperands();
- for (unsigned OpInd = 0; OpInd < OperandsNum; ++OpInd)
- OpTys.push_back(ToVectorTy(I->getOperand(OpInd)->getType(), VF));
+ SmallVector<const Value *, 4> Operands(I->operand_values());
+ Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
- return getScalarizationOverhead(OpTys, RetTy, TTI);
+ return Cost;
}
// Estimate cost of a call instruction CI if it were vectorized with factor VF.
// Compute the scalarization overhead of needed insertelement instructions
// and phi nodes.
if (Legal->isScalarWithPredication(I) && !I->getType()->isVoidTy()) {
- ScalarCost += getScalarizationOverhead(ToVectorTy(I->getType(), VF), true,
- false, TTI);
+ ScalarCost += TTI.getScalarizationOverhead(ToVectorTy(I->getType(), VF),
+ true, false);
ScalarCost += VF * TTI.getCFInstrCost(Instruction::PHI);
}
if (canBeScalarized(J))
Worklist.push_back(J);
else if (needsExtract(J))
- ScalarCost += getScalarizationOverhead(ToVectorTy(J->getType(), VF),
- false, true, TTI);
+ ScalarCost += TTI.getScalarizationOverhead(
+ ToVectorTy(J->getType(),VF), false, true);
}
// Scale the total scalar cost by block probability.