/// Tunable parameters that control the analysis.
const InlineParams &Params;
+ /// Upper bound for the inlining cost. Bonuses are being applied to account
+ /// for speculative "expected profit" of the inlining decision.
int Threshold;
- int Cost;
+
+ /// Inlining cost measured in abstract units, accounts for all the
+ /// instructions expected to be executed for a given function invocation.
+ /// Instructions that are statically proven to be dead based on call-site
+ /// arguments are not counted here.
+ int Cost = 0;
+
bool ComputeFullInlineCost;
- bool IsCallerRecursive;
- bool IsRecursiveCall;
- bool ExposesReturnsTwice;
- bool HasDynamicAlloca;
- bool ContainsNoDuplicateCall;
- bool HasReturn;
- bool HasIndirectBr;
- bool HasUninlineableIntrinsic;
- bool InitsVargArgs;
+ bool IsCallerRecursive = false;
+ bool IsRecursiveCall = false;
+ bool ExposesReturnsTwice = false;
+ bool HasDynamicAlloca = false;
+ bool ContainsNoDuplicateCall = false;
+ bool HasReturn = false;
+ bool HasIndirectBr = false;
+ bool HasUninlineableIntrinsic = false;
+ bool InitsVargArgs = false;
/// Number of bytes allocated statically by the callee.
- uint64_t AllocatedSize;
- unsigned NumInstructions, NumVectorInstructions;
- int VectorBonus, TenPercentVectorBonus;
- // Bonus to be applied when the callee has only one reachable basic block.
- int SingleBBBonus;
+ uint64_t AllocatedSize = 0;
+ unsigned NumInstructions = 0;
+ unsigned NumVectorInstructions = 0;
+
+ /// Bonus to be applied when percentage of vector instructions in callee is
+ /// high (see more details in updateThreshold).
+ int VectorBonus = 0;
+ /// Bonus to be applied when the callee has only one reachable basic block.
+ int SingleBBBonus = 0;
/// While we walk the potentially-inlined instructions, we build up and
/// maintain a mapping of simplified values specific to this callsite. The
/// loads.
bool EnableLoadElimination;
SmallPtrSet<Value *, 16> LoadAddrSet;
- int LoadEliminationCost;
+ int LoadEliminationCost = 0;
// Custom simplification helper routines.
bool isAllocaDerivedArg(Value *V);
InlineResult analyzeBlock(BasicBlock *BB,
SmallPtrSetImpl<const Value *> &EphValues);
+ /// Handle a capped 'int' increment for Cost.
+ void addCost(int64_t Inc, int64_t UpperBound = INT_MAX) {
+ assert(UpperBound > 0 && UpperBound <= INT_MAX && "invalid upper bound");
+ Cost = (int)std::min(UpperBound, Cost + Inc);
+ }
+
// Disable several entry points to the visitor so we don't accidentally use
// them by declaring but not defining them here.
void visit(Module *);
: TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
CandidateCall(Call), Params(Params), Threshold(Params.DefaultThreshold),
- Cost(0), ComputeFullInlineCost(OptComputeFullInlineCost ||
- Params.ComputeFullInlineCost || ORE),
- IsCallerRecursive(false), IsRecursiveCall(false),
- ExposesReturnsTwice(false), HasDynamicAlloca(false),
- ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
- HasUninlineableIntrinsic(false), InitsVargArgs(false), AllocatedSize(0),
- NumInstructions(0), NumVectorInstructions(0), VectorBonus(0),
- SingleBBBonus(0), EnableLoadElimination(true), LoadEliminationCost(0),
- NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
- NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
- NumInstructionsSimplified(0), SROACostSavings(0),
- SROACostSavingsLost(0) {}
+ ComputeFullInlineCost(OptComputeFullInlineCost ||
+ Params.ComputeFullInlineCost || ORE),
+ EnableLoadElimination(true) {}
InlineResult analyzeCall(CallBase &Call);
// Keep a bunch of stats about the cost savings found so we can print them
// out when debugging.
- unsigned NumConstantArgs;
- unsigned NumConstantOffsetPtrArgs;
- unsigned NumAllocaArgs;
- unsigned NumConstantPtrCmps;
- unsigned NumConstantPtrDiffs;
- unsigned NumInstructionsSimplified;
- unsigned SROACostSavings;
- unsigned SROACostSavingsLost;
+ unsigned NumConstantArgs = 0;
+ unsigned NumConstantOffsetPtrArgs = 0;
+ unsigned NumAllocaArgs = 0;
+ unsigned NumConstantPtrCmps = 0;
+ unsigned NumConstantPtrDiffs = 0;
+ unsigned NumInstructionsSimplified = 0;
+ unsigned SROACostSavings = 0;
+ unsigned SROACostSavingsLost = 0;
void dump();
};
void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
// If we're no longer able to perform SROA we need to undo its cost savings
// and prevent subsequent analysis.
- Cost += CostIt->second;
+ addCost(CostIt->second);
SROACostSavings -= CostIt->second;
SROACostSavingsLost += CostIt->second;
SROAArgCosts.erase(CostIt);
void CallAnalyzer::disableLoadElimination() {
if (EnableLoadElimination) {
- Cost += LoadEliminationCost;
+ addCost(LoadEliminationCost);
LoadEliminationCost = 0;
EnableLoadElimination = false;
}
case Instruction::FPToUI:
case Instruction::FPToSI:
if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
- Cost += InlineConstants::CallPenalty;
+ addCost(InlineConstants::CallPenalty);
break;
default:
break;
// as such.
if (I.getType()->isFloatingPointTy() &&
TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
- Cost += InlineConstants::CallPenalty;
+ addCost(InlineConstants::CallPenalty);
return false;
}
case Intrinsic::load_relative:
// This is normally lowered to 4 LLVM instructions.
- Cost += 3 * InlineConstants::InstrCost;
+ addCost(3 * InlineConstants::InstrCost);
return false;
case Intrinsic::memset:
if (TTI.isLoweredToCall(F)) {
// We account for the average 1 instruction per call argument setup
// here.
- Cost += Call.arg_size() * InlineConstants::InstrCost;
+ addCost(Call.arg_size() * InlineConstants::InstrCost);
// Everything other than inline ASM will also have a significant cost
// merely from making the call.
if (!isa<InlineAsm>(Call.getCalledValue()))
- Cost += InlineConstants::CallPenalty;
+ addCost(InlineConstants::CallPenalty);
}
if (!Call.onlyReadsMemory())
// First, pay the price of the argument setup. We account for the average
// 1 instruction per call argument setup here.
- Cost += Call.arg_size() * InlineConstants::InstrCost;
+ addCost(Call.arg_size() * InlineConstants::InstrCost);
// Next, check if this happens to be an indirect function call to a known
// function in this inline context. If not, we've done all we can.
(int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
if (CostLowerBound > Threshold && !ComputeFullInlineCost) {
- Cost = CostLowerBound;
+ addCost((int64_t)SI.getNumCases() * InlineConstants::InstrCost);
return false;
}
int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
4 * InlineConstants::InstrCost;
- Cost = std::min((int64_t)CostUpperBound, JTCost + Cost);
+ addCost(JTCost, (int64_t)CostUpperBound);
return false;
}
// n + n / 2 - 1 = n * 3 / 2 - 1
if (NumCaseCluster <= 3) {
// Suppose a comparison includes one compare and one conditional branch.
- Cost += NumCaseCluster * 2 * InlineConstants::InstrCost;
+ addCost(NumCaseCluster * 2 * InlineConstants::InstrCost);
return false;
}
int64_t SwitchCost =
ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
- Cost = std::min((int64_t)CostUpperBound, SwitchCost + Cost);
+ addCost(SwitchCost, (int64_t)CostUpperBound);
return false;
}
if (Base::visit(&*I))
++NumInstructionsSimplified;
else
- Cost += InlineConstants::InstrCost;
+ addCost(InlineConstants::InstrCost);
using namespace ore;
// If the visit this instruction detected an uninlinable pattern, abort.
return IR;
}
- // Check if we've past the maximum possible threshold so we don't spin in
+ // Check if we've passed the maximum possible threshold so we don't spin in
// huge basic blocks that will never inline.
if (Cost >= Threshold && !ComputeFullInlineCost)
return false;
// Give out bonuses for the callsite, as the instructions setting them up
// will be gone after inlining.
- Cost -= getCallsiteCost(Call, DL);
+ addCost(-getCallsiteCost(Call, DL));
// If this function uses the coldcc calling convention, prefer not to inline
// it.
continue;
NumLoops++;
}
- Cost += NumLoops * InlineConstants::CallPenalty;
+ addCost(NumLoops * InlineConstants::CallPenalty);
}
// We applied the maximum possible vector bonus at the beginning. Now,