InvalidSCCSet, nullptr, nullptr};
PreservedAnalyses PA = PreservedAnalyses::all();
- for (LazyCallGraph::RefSCC &InitialRC : CG.postorder_ref_sccs()) {
+ for (auto RCI = CG.postorder_ref_scc_begin(),
+ RCE = CG.postorder_ref_scc_end();
+ RCI != RCE;) {
assert(RCWorklist.empty() &&
"Should always start with an empty RefSCC worklist");
// The postorder_ref_sccs range we are walking is lazily constructed, so
// to update as the program is simplified and allows us to have greater
// cache locality as forming a RefSCC touches all the parts of all the
// functions within that RefSCC.
- RCWorklist.insert(&InitialRC);
+ //
+ // We also eagerly increment the iterator to the next position because
+ // the CGSCC passes below may delete the current RefSCC.
+ RCWorklist.insert(&*RCI++);
do {
LazyCallGraph::RefSCC *RC = RCWorklist.pop_back_val();
dbgs() << "Re-running SCC passes after a refinement of the "
"current SCC: "
<< *UR.UpdatedC << "\n";
+
+ // Note that both `C` and `RC` may at this point refer to deleted,
+ // invalid SCC and RefSCCs respectively. But we will short circuit
+ // the processing when we check them in the loop above.
} while (UR.UpdatedC);
} while (!CWorklist.empty());
-//===- InlinerPass.h - Code common to all inliners --------------*- C++ -*-===//
+//===- Inliner.h - Inliner pass and infrastructure --------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This file defines a simple policy-based bottom-up inliner. This file
-// implements all of the boring mechanics of the bottom-up inlining, while the
-// subclass determines WHAT to inline, which is the much more interesting
-// component.
-//
-//===----------------------------------------------------------------------===//
-#ifndef LLVM_TRANSFORMS_IPO_INLINERPASS_H
-#define LLVM_TRANSFORMS_IPO_INLINERPASS_H
+#ifndef LLVM_TRANSFORMS_IPO_INLINER_H
+#define LLVM_TRANSFORMS_IPO_INLINER_H
+#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h"
class InlineCost;
class OptimizationRemarkEmitter;
class ProfileSummaryInfo;
-template <class PtrType, unsigned SmallSize> class SmallPtrSet;
/// This class contains all of the helper code which is used to perform the
-/// inlining operations that do not depend on the policy.
-struct Inliner : public CallGraphSCCPass {
- explicit Inliner(char &ID);
- explicit Inliner(char &ID, bool InsertLifetime);
+/// inlining operations that do not depend on the policy. It contains the core
+/// bottom-up inlining infrastructure that specific inliner passes use.
+struct LegacyInlinerBase : public CallGraphSCCPass {
+ explicit LegacyInlinerBase(char &ID);
+ explicit LegacyInlinerBase(char &ID, bool InsertLifetime);
/// For this class, we declare that we require and preserve the call graph.
/// If the derived class implements this method, it should always explicitly
ImportedFunctionsInliningStatistics ImportedFunctionsStats;
};
+/// The inliner pass for the new pass manager.
+///
+/// This pass wires together the inlining utilities and the inline cost
+/// analysis into a CGSCC pass. It considers every call in every function in
+/// the SCC and tries to inline if profitable. It can be tuned with a number of
+/// parameters to control what cost model is used and what tradeoffs are made
+/// when making the decision.
+///
+/// It should be noted that the legacy inliners do considerably more than this
+/// inliner pass does. They provide logic for manually merging allocas, and
+/// doing considerable DCE including the DCE of dead functions. This pass makes
+/// every attempt to be simpler. DCE of functions requires complex reasoning
+/// about comdat groups, etc. Instead, it is expected that other more focused
+/// passes be composed to achieve the same end result.
+class InlinerPass : public PassInfoMixin<InlinerPass> {
+public:
+ InlinerPass(InlineParams Params = getInlineParams())
+ : Params(std::move(Params)) {}
+
+ PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
+ LazyCallGraph &CG, CGSCCUpdateResult &UR);
+
+private:
+ InlineParams Params;
+};
+
} // End llvm namespace
#endif
/// inlined from the callee. This is only filled in if CG is non-null.
SmallVector<WeakVH, 8> InlinedCalls;
+ /// All of the new call sites inlined into the caller.
+ ///
+ /// 'InlineFunction' fills this in by scanning the inlined instructions, and
+ /// only if CG is null. If CG is non-null, instead the value handle
+ /// `InlinedCalls` above is used.
+ SmallVector<CallSite, 8> InlinedCallSites;
+
void reset() {
StaticAllocas.clear();
InlinedCalls.clear();
+ InlinedCallSites.clear();
}
};
/// exists in the instruction stream. Similarly this will inline a recursive
/// function by one level.
///
+/// Note that while this routine is allowed to cleanup and optimize the
+/// *inlined* code to minimize the actual inserted code, it must not delete
+/// code in the caller as users of this routine may have pointers to
+/// instructions in the caller that need to remain stable.
bool InlineFunction(CallInst *C, InlineFunctionInfo &IFI,
AAResults *CalleeAAR = nullptr, bool InsertLifetime = true);
bool InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
bool HotCallsite = false;
uint64_t TotalWeight;
- if (CS.getInstruction()->extractProfTotalWeight(TotalWeight) &&
+ if (PSI && CS.getInstruction()->extractProfTotalWeight(TotalWeight) &&
PSI->isHotCount(TotalWeight)) {
HotCallsite = true;
}
// when it would increase the threshold and the caller does not need to
// minimize its size.
bool InlineHint = Callee.hasFnAttribute(Attribute::InlineHint) ||
- PSI->isFunctionEntryHot(&Callee);
+ (PSI && PSI->isFunctionEntryHot(&Callee));
if (InlineHint && !Caller->optForMinSize())
Threshold = MaxIfValid(Threshold, Params.HintThreshold);
if (HotCallsite && !Caller->optForMinSize())
Threshold = MaxIfValid(Threshold, Params.HotCallSiteThreshold);
- bool ColdCallee = PSI->isFunctionEntryCold(&Callee);
+ bool ColdCallee = PSI && PSI->isFunctionEntryCold(&Callee);
// For cold callees, use the ColdThreshold knob if it is available and reduces
// the threshold.
if (ColdCallee)
#include "llvm/Transforms/IPO/GlobalOpt.h"
#include "llvm/Transforms/IPO/GlobalSplit.h"
#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
+#include "llvm/Transforms/IPO/Inliner.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/IPO/LowerTypeTests.h"
#include "llvm/Transforms/IPO/PartialInlining.h"
#endif
CGSCC_PASS("invalidate<all>", InvalidateAllAnalysesPass())
CGSCC_PASS("function-attrs", PostOrderFunctionAttrsPass())
+CGSCC_PASS("inline", InlinerPass())
CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass())
#undef CGSCC_PASS
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
-#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/Inliner.h"
#include "llvm/Transforms/Utils/Cloning.h"
using namespace llvm;
///
/// Unlike the \c AlwaysInlinerPass, this uses the more heavyweight \c Inliner
/// base class to provide several facilities such as array alloca merging.
-class AlwaysInlinerLegacyPass : public Inliner {
+class AlwaysInlinerLegacyPass : public LegacyInlinerBase {
public:
- AlwaysInlinerLegacyPass() : Inliner(ID, /*InsertLifetime*/ true) {
+ AlwaysInlinerLegacyPass() : LegacyInlinerBase(ID, /*InsertLifetime*/ true) {
initializeAlwaysInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
}
- AlwaysInlinerLegacyPass(bool InsertLifetime) : Inliner(ID, InsertLifetime) {
+ AlwaysInlinerLegacyPass(bool InsertLifetime)
+ : LegacyInlinerBase(ID, InsertLifetime) {
initializeAlwaysInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
}
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/Transforms/IPO/Inliner.h"
using namespace llvm;
/// The common implementation of the inlining logic is shared between this
/// inliner pass and the always inliner pass. The two passes use different cost
/// analyses to determine when to inline.
-class SimpleInliner : public Inliner {
+class SimpleInliner : public LegacyInlinerBase {
InlineParams Params;
public:
- SimpleInliner() : Inliner(ID), Params(llvm::getInlineParams()) {
+ SimpleInliner() : LegacyInlinerBase(ID), Params(llvm::getInlineParams()) {
initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
}
- explicit SimpleInliner(InlineParams Params) : Inliner(ID), Params(Params) {
+ explicit SimpleInliner(InlineParams Params)
+ : LegacyInlinerBase(ID), Params(Params) {
initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
}
bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) {
TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
- return Inliner::runOnSCC(SCC);
+ return LegacyInlinerBase::runOnSCC(SCC);
}
void SimpleInliner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetTransformInfoWrapperPass>();
- Inliner::getAnalysisUsage(AU);
+ LegacyInlinerBase::getAnalysisUsage(AU);
}
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/IPO/Inliner.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/IPO/InlinerPass.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
cl::Hidden, cl::desc("Enable inliner stats for imported functions"));
} // namespace
-Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InsertLifetime(true) {}
+LegacyInlinerBase::LegacyInlinerBase(char &ID)
+ : CallGraphSCCPass(ID), InsertLifetime(true) {}
-Inliner::Inliner(char &ID, bool InsertLifetime)
+LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime)
: CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {}
/// For this class, we declare that we require and preserve the call graph.
/// If the derived class implements this method, it should
/// always explicitly call the implementation here.
-void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
+void LegacyInlinerBase::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
return false;
}
-bool Inliner::doInitialization(CallGraph &CG) {
+bool LegacyInlinerBase::doInitialization(CallGraph &CG) {
if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
ImportedFunctionsStats.setModuleInfo(CG.getModule());
return false; // No changes to CallGraph.
}
-bool Inliner::runOnSCC(CallGraphSCC &SCC) {
+bool LegacyInlinerBase::runOnSCC(CallGraphSCC &SCC) {
if (skipSCC(SCC))
return false;
return inlineCalls(SCC);
return Changed;
}
-bool Inliner::inlineCalls(CallGraphSCC &SCC) {
+bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
ACT = &getAnalysis<AssumptionCacheTracker>();
PSI = getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
/// Remove now-dead linkonce functions at the end of
/// processing to avoid breaking the SCC traversal.
-bool Inliner::doFinalization(CallGraph &CG) {
+bool LegacyInlinerBase::doFinalization(CallGraph &CG) {
if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
ImportedFunctionsStats.dump(InlinerFunctionImportStats ==
InlinerFunctionImportStatsOpts::Verbose);
}
/// Remove dead functions that are not included in DNR (Do Not Remove) list.
-bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
+bool LegacyInlinerBase::removeDeadFunctions(CallGraph &CG,
+ bool AlwaysInlineOnly) {
SmallVector<CallGraphNode *, 16> FunctionsToRemove;
SmallVector<CallGraphNode *, 16> DeadFunctionsInComdats;
SmallDenseMap<const Comdat *, int, 16> ComdatEntriesAlive;
}
return true;
}
+
+PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
+ CGSCCAnalysisManager &AM, LazyCallGraph &CG,
+ CGSCCUpdateResult &UR) {
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG)
+ .getManager();
+ const ModuleAnalysisManager &MAM =
+ AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG).getManager();
+ bool Changed = false;
+
+ assert(InitialC.size() > 0 && "Cannot handle an empty SCC!");
+ Module &M = *InitialC.begin()->getFunction().getParent();
+ ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M);
+
+ std::function<AssumptionCache &(Function &)> GetAssumptionCache =
+ [&](Function &F) -> AssumptionCache & {
+ return FAM.getResult<AssumptionAnalysis>(F);
+ };
+
+ // Setup the data structure used to plumb customization into the
+ // `InlineFunction` routine.
+ InlineFunctionInfo IFI(/*cg=*/nullptr);
+
+ auto GetInlineCost = [&](CallSite CS) {
+ Function &Callee = *CS.getCalledFunction();
+ auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
+ return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, PSI);
+ };
+
+ // We use a worklist of nodes to process so that we can handle if the SCC
+ // structure changes and some nodes are no longer part of the current SCC. We
+ // also need to use an updatable pointer for the SCC as a consequence.
+ SmallVector<LazyCallGraph::Node *, 16> Nodes;
+ for (auto &N : InitialC)
+ Nodes.push_back(&N);
+ auto *C = &InitialC;
+ auto *RC = &C->getOuterRefSCC();
+
+ // We also use a secondary worklist of call sites within a particular node to
+ // allow quickly continuing to inline through newly inlined call sites where
+ // possible.
+ SmallVector<CallSite, 16> Calls;
+
+ // Track a set vector of inlined callees so that we can augment the caller
+ // with all of their edges in the call graph before pruning out the ones that
+ // got simplified away.
+ SmallSetVector<Function *, 4> InlinedCallees;
+
+ // Track the dead functions to delete once finished with inlining calls. We
+ // defer deleting these to make it easier to handle the call graph updates.
+ SmallVector<Function *, 4> DeadFunctions;
+
+ do {
+ auto &N = *Nodes.pop_back_val();
+ if (CG.lookupSCC(N) != C)
+ continue;
+ Function &F = N.getFunction();
+ if (F.hasFnAttribute(Attribute::OptimizeNone))
+ continue;
+
+ // Get the remarks emission analysis for the caller.
+ auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+
+ // We want to generally process call sites top-down in order for
+ // simplifications stemming from replacing the call with the returned value
+ // after inlining to be visible to subsequent inlining decisions. So we
+ // walk the function backwards and then process the back of the vector.
+ // FIXME: Using reverse is a really bad way to do this. Instead we should
+ // do an actual PO walk of the function body.
+ for (Instruction &I : reverse(instructions(F)))
+ if (auto CS = CallSite(&I))
+ if (Function *Callee = CS.getCalledFunction())
+ if (!Callee->isDeclaration())
+ Calls.push_back(CS);
+
+ bool DidInline = false;
+ while (!Calls.empty()) {
+ CallSite CS = Calls.pop_back_val();
+ Function &Callee = *CS.getCalledFunction();
+
+ // Check whether we want to inline this callsite.
+ if (!shouldInline(CS, GetInlineCost, ORE))
+ continue;
+
+ if (!InlineFunction(CS, IFI))
+ continue;
+ DidInline = true;
+ InlinedCallees.insert(&Callee);
+
+ // Add any new callsites to defined functions to the worklist.
+ for (CallSite &CS : reverse(IFI.InlinedCallSites))
+ if (Function *NewCallee = CS.getCalledFunction())
+ if (!NewCallee->isDeclaration())
+ Calls.push_back(CS);
+
+ // For local functions, check whether this makes the callee trivially
+ // dead. In that case, we can drop the body of the function eagerly
+ // which may reduce the number of callers of other functions to one,
+ // changing inline cost thresholds.
+ if (Callee.hasLocalLinkage()) {
+ // To check this we also need to nuke any dead constant uses (perhaps
+ // made dead by this operation on other functions).
+ Callee.removeDeadConstantUsers();
+ if (Callee.use_empty()) {
+ // Clear the body and queue the function itself for deletion when we
+ // finish inlining and call graph updates.
+ // Note that after this point, it is an error to do anything other
+ // than use the callee's address or delete it.
+ Callee.dropAllReferences();
+ assert(find(DeadFunctions, &Callee) == DeadFunctions.end() &&
+ "Cannot put cause a function to become dead twice!");
+ DeadFunctions.push_back(&Callee);
+ }
+ }
+ }
+
+ if (!DidInline)
+ continue;
+ Changed = true;
+
+ // Add all the inlined callees' edges to the caller. These are by
+ // definition trivial edges as we already had a transitive call edge to the
+ // callee.
+ for (Function *InlinedCallee : InlinedCallees) {
+ LazyCallGraph::Node &CalleeN = *CG.lookup(*InlinedCallee);
+ for (LazyCallGraph::Edge &E : CalleeN)
+ if (E.isCall())
+ RC->insertTrivialCallEdge(N, *E.getNode());
+ else
+ RC->insertTrivialRefEdge(N, *E.getNode());
+ }
+ InlinedCallees.clear();
+
+ // At this point, since we have made changes we have at least removed
+ // a call instruction. However, in the process we do some incremental
+ // simplification of the surrounding code. This simplification can
+ // essentially do all of the same things as a function pass and we can
+ // re-use the exact same logic for updating the call graph to reflect the
+ // change..
+ C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR);
+ RC = &C->getOuterRefSCC();
+ } while (!Nodes.empty());
+
+ // Now that we've finished inlining all of the calls across this SCC, delete
+ // all of the trivially dead functions, updating the call graph and the CGSCC
+ // pass manager in the process.
+ //
+ // Note that this walks a pointer set which has non-deterministic order but
+ // that is OK as all we do is delete things and add pointers to unordered
+ // sets.
+ for (Function *DeadF : DeadFunctions) {
+ // Get the necessary information out of the call graph and nuke the
+ // function there.
+ auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF));
+ auto &DeadRC = DeadC.getOuterRefSCC();
+ CG.removeDeadFunction(*DeadF);
+
+ // Mark the relevant parts of the call graph as invalid so we don't visit
+ // them.
+ UR.InvalidatedSCCs.insert(&DeadC);
+ UR.InvalidatedRefSCCs.insert(&DeadRC);
+
+ // And delete the actual function from the module.
+ M.getFunctionList().erase(DeadF);
+ }
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
}
// Update the callgraph if requested.
- if (IFI.CG)
+ if (IFI.CG) {
UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
+ } else {
+ // Otherwise just collect the raw call sites that were inlined.
+ for (BasicBlock &NewBB :
+ make_range(FirstNewBlock->getIterator(), Caller->end()))
+ for (Instruction &I : NewBB)
+ if (auto CS = CallSite(&I))
+ IFI.InlinedCallSites.push_back(CS);
+ }
// For 'nodebug' functions, the associated DISubprogram is always null.
// Conservatively avoid propagating the callsite debug location to
; RUN: opt < %s -inline -sroa -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(inline,function(sroa))' -S | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
define i32 @test1f(i32 %i) {
--- /dev/null
+; RUN: opt < %s -aa-pipeline=basic-aa -passes='cgscc(function-attrs,inline)' -S | FileCheck %s
+; This test runs the inliner and the function attribute deduction. It ensures
+; that when the inliner mutates the call graph it correctly updates the CGSCC
+; iteration so that we can compute refined function attributes. In this way it
+; is leveraging function attribute computation to observe correct call graph
+; updates.
+
+; Boring unknown external function call.
+; CHECK: declare void @unknown()
+declare void @unknown()
+
+; Sanity check: this should get annotated as readnone.
+; CHECK: Function Attrs: readnone
+; CHECK-NEXT: declare void @readnone()
+declare void @readnone() readnone
+
+; The 'test1_' prefixed functions are designed to trigger forming a new direct
+; call in the inlined body of the function. After that, we form a new SCC and
+; using that can deduce precise function attrs.
+
+; This function should no longer exist.
+; CHECK-NOT: @test1_f()
+define internal void @test1_f(void()* %p) {
+entry:
+ call void %p()
+ ret void
+}
+
+; This function should have had 'readnone' deduced for its SCC.
+; CHECK: Function Attrs: noinline readnone
+; CHECK-NEXT: define void @test1_g()
+define void @test1_g() noinline {
+entry:
+ call void @test1_f(void()* @test1_h)
+ ret void
+}
+
+; This function should have had 'readnone' deduced for its SCC.
+; CHECK: Function Attrs: noinline readnone
+; CHECK-NEXT: define void @test1_h()
+define void @test1_h() noinline {
+entry:
+ call void @test1_g()
+ call void @readnone()
+ ret void
+}
+
+
+; The 'test2_' prefixed functions are designed to trigger forming a new direct
+; call due to RAUW-ing the returned value of a called function into the caller.
+; This too should form a new SCC which can then be reasoned about to compute
+; precise function attrs.
+
+; This function should no longer exist.
+; CHECK-NOT: @test2_f()
+define internal void()* @test2_f() {
+entry:
+ ret void()* @test2_h
+}
+
+; This function should have had 'readnone' deduced for its SCC.
+; CHECK: Function Attrs: noinline readnone
+; CHECK-NEXT: define void @test2_g()
+define void @test2_g() noinline {
+entry:
+ %p = call void()* @test2_f()
+ call void %p()
+ ret void
+}
+
+; This function should have had 'readnone' deduced for its SCC.
+; CHECK: Function Attrs: noinline readnone
+; CHECK-NEXT: define void @test2_h()
+define void @test2_h() noinline {
+entry:
+ call void @test2_g()
+ call void @readnone()
+ ret void
+}
+
+
+; The 'test3_' prefixed functions are designed to inline in a way that causes
+; call sites to become trivially dead during the middle of inlining callsites of
+; a single function to make sure that the inliner does not get confused by this
+; pattern.
+
+; CHECK-NOT: @test3_maybe_unknown(
+define internal void @test3_maybe_unknown(i1 %b) {
+entry:
+ br i1 %b, label %then, label %exit
+
+then:
+ call void @unknown()
+ br label %exit
+
+exit:
+ ret void
+}
+
+; CHECK-NOT: @test3_f(
+define internal i1 @test3_f() {
+entry:
+ ret i1 false
+}
+
+; CHECK-NOT: @test3_g(
+define internal i1 @test3_g(i1 %b) {
+entry:
+ br i1 %b, label %then1, label %if2
+
+then1:
+ call void @test3_maybe_unknown(i1 true)
+ br label %if2
+
+if2:
+ %f = call i1 @test3_f()
+ br i1 %f, label %then2, label %exit
+
+then2:
+ call void @test3_maybe_unknown(i1 true)
+ br label %exit
+
+exit:
+ ret i1 false
+}
+
+; FIXME: Currently the inliner doesn't successfully mark this as readnone
+; because while it simplifies trivially dead CFGs when inlining callees it
+; doesn't simplify the caller's trivially dead CFG and so we end with a dead
+; block calling @unknown.
+; CHECK-NOT: Function Attrs: readnone
+; CHECK: define void @test3_h()
+define void @test3_h() {
+entry:
+ %g = call i1 @test3_g(i1 false)
+ br i1 %g, label %then, label %exit
+
+then:
+ call void @test3_maybe_unknown(i1 true)
+ br label %exit
+
+exit:
+ call void @test3_maybe_unknown(i1 false)
+ ret void
+}
--- /dev/null
+; RUN: opt < %s -passes='cgscc(inline)' -inline-threshold=0 -S | FileCheck %s
+
+; The 'test1_' prefixed functions test the basic 'last callsite' inline
+; threshold adjustment where we specifically inline the last call site of an
+; internal function regardless of cost.
+
+define internal void @test1_f() {
+entry:
+ %p = alloca i32
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ ret void
+}
+
+; Identical to @test1_f but doesn't get inlined because there is more than one
+; call. If this *does* get inlined, the body used both here and in @test1_f
+; isn't a good test for different threshold based on the last call.
+define internal void @test1_g() {
+entry:
+ %p = alloca i32
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ ret void
+}
+
+define void @test1() {
+; CHECK-LABEL: define void @test1()
+entry:
+ call void @test1_f()
+; CHECK-NOT: @test1_f
+
+ call void @test1_g()
+ call void @test1_g()
+; CHECK: call void @test1_g()
+; CHECK: call void @test1_g()
+
+ ret void
+}
+
+
+; The 'test2_' prefixed functions test that we can discover the last callsite
+; bonus after having inlined the prior call site. For this to to work, we need
+; a callsite dependent cost so we have a trivial predicate guarding all the
+; cost, and set that in a particular direction.
+
+define internal void @test2_f(i1 %b) {
+entry:
+ %p = alloca i32
+ br i1 %b, label %then, label %exit
+
+then:
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ br label %exit
+
+exit:
+ ret void
+}
+
+; Identical to @test2_f but doesn't get inlined because there is more than one
+; call. If this *does* get inlined, the body used both here and in @test2_f
+; isn't a good test for different threshold based on the last call.
+define internal void @test2_g(i1 %b) {
+entry:
+ %p = alloca i32
+ br i1 %b, label %then, label %exit
+
+then:
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ br label %exit
+
+exit:
+ ret void
+}
+
+define void @test2() {
+; CHECK-LABEL: define void @test2()
+entry:
+ ; The first call is trivial to inline due to the argument.
+ call void @test2_f(i1 false)
+; CHECK-NOT: @test2_f
+
+ ; The second call is too expensive to inline unless we update the number of
+ ; calls after inlining the second.
+ call void @test2_f(i1 true)
+; CHECK-NOT: @test2_f
+
+ ; Sanity check that two calls with the hard predicate remain uninlined.
+ call void @test2_g(i1 true)
+ call void @test2_g(i1 true)
+; CHECK: call void @test2_g(i1 true)
+; CHECK: call void @test2_g(i1 true)
+
+ ret void
+}
+
+
+; The 'test3_' prefixed functions are similar to the 'test2_' functions but the
+; relative order of the trivial and hard to inline callsites is reversed. This
+; checks that the order of calls isn't significant to whether we observe the
+; "last callsite" threshold difference because the next-to-last gets inlined.
+; FIXME: We don't currently catch this case.
+
+define internal void @test3_f(i1 %b) {
+entry:
+ %p = alloca i32
+ br i1 %b, label %then, label %exit
+
+then:
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ br label %exit
+
+exit:
+ ret void
+}
+
+; Identical to @test3_f but doesn't get inlined because there is more than one
+; call. If this *does* get inlined, the body used both here and in @test3_f
+; isn't a good test for different threshold based on the last call.
+define internal void @test3_g(i1 %b) {
+entry:
+ %p = alloca i32
+ br i1 %b, label %then, label %exit
+
+then:
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ br label %exit
+
+exit:
+ ret void
+}
+
+define void @test3() {
+; CHECK-LABEL: define void @test3()
+entry:
+ ; The first call is too expensive to inline unless we update the number of
+ ; calls after inlining the second.
+ call void @test3_f(i1 true)
+; FIXME: We should inline this call without iteration.
+; CHECK: call void @test3_f(i1 true)
+
+ ; But the second call is trivial to inline due to the argument.
+ call void @test3_f(i1 false)
+; CHECK-NOT: @test3_f
+
+ ; Sanity check that two calls with the hard predicate remain uninlined.
+ call void @test3_g(i1 true)
+ call void @test3_g(i1 true)
+; CHECK: call void @test3_g(i1 true)
+; CHECK: call void @test3_g(i1 true)
+
+ ret void
+}
+
+
+; The 'test4_' prefixed functions are similar to the 'test2_' prefixed
+; functions but include unusual constant expressions that make discovering that
+; a function is dead harder.
+
+define internal void @test4_f(i1 %b) {
+entry:
+ %p = alloca i32
+ br i1 %b, label %then, label %exit
+
+then:
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ br label %exit
+
+exit:
+ ret void
+}
+
+; Identical to @test4_f but doesn't get inlined because there is more than one
+; call. If this *does* get inlined, the body used both here and in @test4_f
+; isn't a good test for different threshold based on the last call.
+define internal void @test4_g(i1 %b) {
+entry:
+ %p = alloca i32
+ br i1 %b, label %then, label %exit
+
+then:
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ store volatile i32 0, i32* %p
+ br label %exit
+
+exit:
+ ret void
+}
+
+define void @test4() {
+; CHECK-LABEL: define void @test4()
+entry:
+ ; The first call is trivial to inline due to the argument. However this
+ ; argument also uses the function being called as part of a complex
+ ; constant expression. Merely inlining and deleting the call isn't enough to
+ ; drop the use count here, we need to GC the dead constant expression as
+ ; well.
+ call void @test4_f(i1 icmp ne (i64 ptrtoint (void (i1)* @test4_f to i64), i64 ptrtoint(void (i1)* @test4_f to i64)))
+; CHECK-NOT: @test4_f
+
+ ; The second call is too expensive to inline unless we update the number of
+ ; calls after inlining the second.
+ call void @test4_f(i1 true)
+; CHECK-NOT: @test4_f
+
+ ; And check that a single call to a function which is used by a complex
+ ; constant expression cannot be inlined because the constant expression forms
+ ; a second use. If this part starts failing we need to use more complex
+ ; constant expressions to reference a particular function with them.
+ %sink = alloca i1
+ store volatile i1 icmp ne (i64 ptrtoint (void (i1)* @test4_g to i64), i64 ptrtoint(void (i1)* @test4_g to i64)), i1* %sink
+ call void @test4_g(i1 true)
+; CHECK: store volatile i1 false
+; CHECK: call void @test4_g(i1 true)
+
+ ret void
+}
; RUN: opt < %s -inline -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s
; Test that bar and bar2 are both inlined throughout and removed.
@A = weak global i32 0 ; <i32*> [#uses=1]
@B = weak global i32 0 ; <i32*> [#uses=1]