[PM] Provide an initial, minimal port of the inliner to the new pass manager.

author Chandler Carruth <chandlerc@gmail.com>

Tue, 20 Dec 2016 03:15:32 +0000 (03:15 +0000)

committer Chandler Carruth <chandlerc@gmail.com>

Tue, 20 Dec 2016 03:15:32 +0000 (03:15 +0000)
author Chandler Carruth <chandlerc@gmail.com>
Tue, 20 Dec 2016 03:15:32 +0000 (03:15 +0000)
committer Chandler Carruth <chandlerc@gmail.com>
Tue, 20 Dec 2016 03:15:32 +0000 (03:15 +0000)
diff --git a/include/llvm/Analysis/CGSCCPassManager.h b/include/llvm/Analysis/CGSCCPassManager.h

index 603f92bebde657cb3c6df3bedfb661d42cef24fe..8eaa771afce3e652c559704637813f80efc4509d 100644 (file)
--- a/include/llvm/Analysis/CGSCCPassManager.h
+++ b/include/llvm/Analysis/CGSCCPassManager.h
@@ -331,7 +331,9 @@ public:
                              InvalidSCCSet, nullptr,   nullptr};
  
      PreservedAnalyses PA = PreservedAnalyses::all();
-    for (LazyCallGraph::RefSCC &InitialRC : CG.postorder_ref_sccs()) {
+    for (auto RCI = CG.postorder_ref_scc_begin(),
+              RCE = CG.postorder_ref_scc_end();
+         RCI != RCE;) {
        assert(RCWorklist.empty() &&
               "Should always start with an empty RefSCC worklist");
        // The postorder_ref_sccs range we are walking is lazily constructed, so
@@ -342,7 +344,10 @@ public:
        // to update as the program is simplified and allows us to have greater
        // cache locality as forming a RefSCC touches all the parts of all the
        // functions within that RefSCC.
-      RCWorklist.insert(&InitialRC);
+      //
+      // We also eagerly increment the iterator to the next position because
+      // the CGSCC passes below may delete the current RefSCC.
+      RCWorklist.insert(&*RCI++);
  
        do {
          LazyCallGraph::RefSCC *RC = RCWorklist.pop_back_val();
@@ -419,6 +424,10 @@ public:
                dbgs() << "Re-running SCC passes after a refinement of the "
                          "current SCC: "
                       << *UR.UpdatedC << "\n";
+
+            // Note that both `C` and `RC` may at this point refer to deleted,
+            // invalid SCC and RefSCCs respectively. But we will short circuit
+            // the processing when we check them in the loop above.
            } while (UR.UpdatedC);
  
          } while (!CWorklist.empty());
diff --git a/include/llvm/Transforms/IPO/InlinerPass.h b/include/llvm/Transforms/IPO/Inliner.h

similarity index 60%

rename from include/llvm/Transforms/IPO/InlinerPass.h

rename to include/llvm/Transforms/IPO/Inliner.h

index de5f5d84579ee6189f39de292aa18c06d2a834a0..b3ca5156e388302177f85fddfc4f72d96ef4085c 100644 (file)
--- a/include/llvm/Transforms/IPO/InlinerPass.h
+++ b/include/llvm/Transforms/IPO/Inliner.h
@@ -1,4 +1,4 @@
-//===- InlinerPass.h - Code common to all inliners --------------*- C++ -*-===//
+//===- Inliner.h - Inliner pass and infrastructure --------------*- C++ -*-===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -6,19 +6,14 @@
  // License. See LICENSE.TXT for details.
  //
  //===----------------------------------------------------------------------===//
-//
-// This file defines a simple policy-based bottom-up inliner.  This file
-// implements all of the boring mechanics of the bottom-up inlining, while the
-// subclass determines WHAT to inline, which is the much more interesting
-// component.
-//
-//===----------------------------------------------------------------------===//
  
-#ifndef LLVM_TRANSFORMS_IPO_INLINERPASS_H
-#define LLVM_TRANSFORMS_IPO_INLINERPASS_H
+#ifndef LLVM_TRANSFORMS_IPO_INLINER_H
+#define LLVM_TRANSFORMS_IPO_INLINER_H
  
+#include "llvm/Analysis/CGSCCPassManager.h"
  #include "llvm/Analysis/CallGraphSCCPass.h"
  #include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/LazyCallGraph.h"
  #include "llvm/Analysis/TargetTransformInfo.h"
  #include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h"
  
@@ -29,13 +24,13 @@ class DataLayout;
  class InlineCost;
  class OptimizationRemarkEmitter;
  class ProfileSummaryInfo;
-template <class PtrType, unsigned SmallSize> class SmallPtrSet;
  
  /// This class contains all of the helper code which is used to perform the
-/// inlining operations that do not depend on the policy.
-struct Inliner : public CallGraphSCCPass {
-  explicit Inliner(char &ID);
-  explicit Inliner(char &ID, bool InsertLifetime);
+/// inlining operations that do not depend on the policy. It contains the core
+/// bottom-up inlining infrastructure that specific inliner passes use.
+struct LegacyInlinerBase : public CallGraphSCCPass {
+  explicit LegacyInlinerBase(char &ID);
+  explicit LegacyInlinerBase(char &ID, bool InsertLifetime);
  
    /// For this class, we declare that we require and preserve the call graph.
    /// If the derived class implements this method, it should always explicitly
@@ -82,6 +77,32 @@ protected:
    ImportedFunctionsInliningStatistics ImportedFunctionsStats;
  };
  
+/// The inliner pass for the new pass manager.
+///
+/// This pass wires together the inlining utilities and the inline cost
+/// analysis into a CGSCC pass. It considers every call in every function in
+/// the SCC and tries to inline if profitable. It can be tuned with a number of
+/// parameters to control what cost model is used and what tradeoffs are made
+/// when making the decision.
+///
+/// It should be noted that the legacy inliners do considerably more than this
+/// inliner pass does. They provide logic for manually merging allocas, and
+/// doing considerable DCE including the DCE of dead functions. This pass makes
+/// every attempt to be simpler. DCE of functions requires complex reasoning
+/// about comdat groups, etc. Instead, it is expected that other more focused
+/// passes be composed to achieve the same end result.
+class InlinerPass : public PassInfoMixin<InlinerPass> {
+public:
+  InlinerPass(InlineParams Params = getInlineParams())
+      : Params(std::move(Params)) {}
+
+  PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
+                        LazyCallGraph &CG, CGSCCUpdateResult &UR);
+
+private:
+  InlineParams Params;
+};
+
  } // End llvm namespace
  
  #endif
diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h

index edba2bc66dd522b79bd50f567014ba2894b01f04..5eeb8cf30695c0ef6f3aaedc4f49b88c5e29d415 100644 (file)
--- a/include/llvm/Transforms/Utils/Cloning.h
+++ b/include/llvm/Transforms/Utils/Cloning.h
@@ -194,9 +194,17 @@ public:
    /// inlined from the callee.  This is only filled in if CG is non-null.
    SmallVector<WeakVH, 8> InlinedCalls;
  
+  /// All of the new call sites inlined into the caller.
+  ///
+  /// 'InlineFunction' fills this in by scanning the inlined instructions, and
+  /// only if CG is null. If CG is non-null, instead the value handle
+  /// `InlinedCalls` above is used.
+  SmallVector<CallSite, 8> InlinedCallSites;
+
    void reset() {
      StaticAllocas.clear();
      InlinedCalls.clear();
+    InlinedCallSites.clear();
    }
  };
  
@@ -210,6 +218,10 @@ public:
  /// exists in the instruction stream.  Similarly this will inline a recursive
  /// function by one level.
  ///
+/// Note that while this routine is allowed to cleanup and optimize the
+/// *inlined* code to minimize the actual inserted code, it must not delete
+/// code in the caller as users of this routine may have pointers to
+/// instructions in the caller that need to remain stable.
  bool InlineFunction(CallInst *C, InlineFunctionInfo &IFI,
                      AAResults *CalleeAAR = nullptr, bool InsertLifetime = true);
  bool InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp

index 0228a1ba38f33c67bf91ff902f97750018f69f50..9b9faacd354c388d0159f71ba897cf44270b584b 100644 (file)
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -638,7 +638,7 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
  
    bool HotCallsite = false;
    uint64_t TotalWeight;
-  if (CS.getInstruction()->extractProfTotalWeight(TotalWeight) &&
+  if (PSI && CS.getInstruction()->extractProfTotalWeight(TotalWeight) &&
        PSI->isHotCount(TotalWeight)) {
      HotCallsite = true;
    }
@@ -647,14 +647,14 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
    // when it would increase the threshold and the caller does not need to
    // minimize its size.
    bool InlineHint = Callee.hasFnAttribute(Attribute::InlineHint) ||
-                    PSI->isFunctionEntryHot(&Callee);
+                    (PSI && PSI->isFunctionEntryHot(&Callee));
    if (InlineHint && !Caller->optForMinSize())
      Threshold = MaxIfValid(Threshold, Params.HintThreshold);
  
    if (HotCallsite && !Caller->optForMinSize())
      Threshold = MaxIfValid(Threshold, Params.HotCallSiteThreshold);
  
-  bool ColdCallee = PSI->isFunctionEntryCold(&Callee);
+  bool ColdCallee = PSI && PSI->isFunctionEntryCold(&Callee);
    // For cold callees, use the ColdThreshold knob if it is available and reduces
    // the threshold.
    if (ColdCallee)
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp

index ce523ae5793617dd5893dc38185fa5974b3a39e9..95d4b64dc87e5827d7b7ee00385e97d69569de1e 100644 (file)
--- a/lib/Passes/PassBuilder.cpp
+++ b/lib/Passes/PassBuilder.cpp
@@ -72,6 +72,7 @@
  #include "llvm/Transforms/IPO/GlobalOpt.h"
  #include "llvm/Transforms/IPO/GlobalSplit.h"
  #include "llvm/Transforms/IPO/InferFunctionAttrs.h"
+#include "llvm/Transforms/IPO/Inliner.h"
  #include "llvm/Transforms/IPO/Internalize.h"
  #include "llvm/Transforms/IPO/LowerTypeTests.h"
  #include "llvm/Transforms/IPO/PartialInlining.h"
diff --git a/lib/Passes/PassRegistry.def b/lib/Passes/PassRegistry.def

index a35e7e8caf14ede9c1b1d604e0505fff6824c84f..da5b5a2443453c9379639bb60319cc11350bb8d4 100644 (file)
--- a/lib/Passes/PassRegistry.def
+++ b/lib/Passes/PassRegistry.def
@@ -87,6 +87,7 @@ CGSCC_ANALYSIS("fam-proxy", FunctionAnalysisManagerCGSCCProxy())
  #endif
  CGSCC_PASS("invalidate<all>", InvalidateAllAnalysesPass())
  CGSCC_PASS("function-attrs", PostOrderFunctionAttrsPass())
+CGSCC_PASS("inline", InlinerPass())
  CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass())
  #undef CGSCC_PASS
  
diff --git a/lib/Transforms/IPO/AlwaysInliner.cpp b/lib/Transforms/IPO/AlwaysInliner.cpp

index de059b656636d0eb7092c05733e2ac5aafd2be15..304694f9cef7116ed98b5844e88d762e561d726a 100644 (file)
--- a/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -26,7 +26,8 @@
  #include "llvm/IR/IntrinsicInst.h"
  #include "llvm/IR/Module.h"
  #include "llvm/IR/Type.h"
-#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/Inliner.h"
  #include "llvm/Transforms/Utils/Cloning.h"
  
  using namespace llvm;
@@ -62,14 +63,15 @@ namespace {
  ///
  /// Unlike the \c AlwaysInlinerPass, this uses the more heavyweight \c Inliner
  /// base class to provide several facilities such as array alloca merging.
-class AlwaysInlinerLegacyPass : public Inliner {
+class AlwaysInlinerLegacyPass : public LegacyInlinerBase {
  
  public:
-  AlwaysInlinerLegacyPass() : Inliner(ID, /*InsertLifetime*/ true) {
+  AlwaysInlinerLegacyPass() : LegacyInlinerBase(ID, /*InsertLifetime*/ true) {
      initializeAlwaysInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
    }
  
-  AlwaysInlinerLegacyPass(bool InsertLifetime) : Inliner(ID, InsertLifetime) {
+  AlwaysInlinerLegacyPass(bool InsertLifetime)
+      : LegacyInlinerBase(ID, InsertLifetime) {
      initializeAlwaysInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
    }
  
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp

index ef0465883f008a3122352de611f0fef16e62b96c..1770445b413f63f2e25d00208937e957eab3270b 100644 (file)
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -25,7 +25,7 @@
  #include "llvm/IR/Module.h"
  #include "llvm/IR/Type.h"
  #include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/Transforms/IPO/Inliner.h"
  
  using namespace llvm;
  
@@ -38,16 +38,17 @@ namespace {
  /// The common implementation of the inlining logic is shared between this
  /// inliner pass and the always inliner pass. The two passes use different cost
  /// analyses to determine when to inline.
-class SimpleInliner : public Inliner {
+class SimpleInliner : public LegacyInlinerBase {
  
    InlineParams Params;
  
  public:
-  SimpleInliner() : Inliner(ID), Params(llvm::getInlineParams()) {
+  SimpleInliner() : LegacyInlinerBase(ID), Params(llvm::getInlineParams()) {
      initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
    }
  
-  explicit SimpleInliner(InlineParams Params) : Inliner(ID), Params(Params) {
+  explicit SimpleInliner(InlineParams Params)
+      : LegacyInlinerBase(ID), Params(Params) {
      initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
    }
  
@@ -101,10 +102,10 @@ Pass *llvm::createFunctionInliningPass(InlineParams &Params) {
  
  bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) {
    TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
-  return Inliner::runOnSCC(SCC);
+  return LegacyInlinerBase::runOnSCC(SCC);
  }
  
  void SimpleInliner::getAnalysisUsage(AnalysisUsage &AU) const {
    AU.addRequired<TargetTransformInfoWrapperPass>();
-  Inliner::getAnalysisUsage(AU);
+  LegacyInlinerBase::getAnalysisUsage(AU);
  }
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp

index cc420a95c58f6bc95698a44a5f5e8dec7fb6e1b0..baef3086f7d4fa9a9f28cfadd8356064e422c03f 100644 (file)
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -13,6 +13,7 @@
  //
  //===----------------------------------------------------------------------===//
  
+#include "llvm/Transforms/IPO/Inliner.h"
  #include "llvm/ADT/SmallPtrSet.h"
  #include "llvm/ADT/Statistic.h"
  #include "llvm/Analysis/AliasAnalysis.h"
@@ -26,12 +27,12 @@
  #include "llvm/IR/CallSite.h"
  #include "llvm/IR/DataLayout.h"
  #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/InstIterator.h"
  #include "llvm/IR/Instructions.h"
  #include "llvm/IR/IntrinsicInst.h"
  #include "llvm/IR/Module.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/IPO/InlinerPass.h"
  #include "llvm/Transforms/Utils/Cloning.h"
  #include "llvm/Transforms/Utils/Local.h"
  using namespace llvm;
@@ -76,15 +77,16 @@ cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats(
      cl::Hidden, cl::desc("Enable inliner stats for imported functions"));
  } // namespace
  
-Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InsertLifetime(true) {}
+LegacyInlinerBase::LegacyInlinerBase(char &ID)
+    : CallGraphSCCPass(ID), InsertLifetime(true) {}
  
-Inliner::Inliner(char &ID, bool InsertLifetime)
+LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime)
      : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {}
  
  /// For this class, we declare that we require and preserve the call graph.
  /// If the derived class implements this method, it should
  /// always explicitly call the implementation here.
-void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
+void LegacyInlinerBase::getAnalysisUsage(AnalysisUsage &AU) const {
    AU.addRequired<AssumptionCacheTracker>();
    AU.addRequired<ProfileSummaryInfoWrapperPass>();
    AU.addRequired<TargetLibraryInfoWrapperPass>();
@@ -409,13 +411,13 @@ static bool InlineHistoryIncludes(
    return false;
  }
  
-bool Inliner::doInitialization(CallGraph &CG) {
+bool LegacyInlinerBase::doInitialization(CallGraph &CG) {
    if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
      ImportedFunctionsStats.setModuleInfo(CG.getModule());
    return false; // No changes to CallGraph.
  }
  
-bool Inliner::runOnSCC(CallGraphSCC &SCC) {
+bool LegacyInlinerBase::runOnSCC(CallGraphSCC &SCC) {
    if (skipSCC(SCC))
      return false;
    return inlineCalls(SCC);
@@ -630,7 +632,7 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
    return Changed;
  }
  
-bool Inliner::inlineCalls(CallGraphSCC &SCC) {
+bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) {
    CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
    ACT = &getAnalysis<AssumptionCacheTracker>();
    PSI = getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
@@ -655,7 +657,7 @@ bool Inliner::inlineCalls(CallGraphSCC &SCC) {
  
  /// Remove now-dead linkonce functions at the end of
  /// processing to avoid breaking the SCC traversal.
-bool Inliner::doFinalization(CallGraph &CG) {
+bool LegacyInlinerBase::doFinalization(CallGraph &CG) {
    if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
      ImportedFunctionsStats.dump(InlinerFunctionImportStats ==
                                  InlinerFunctionImportStatsOpts::Verbose);
@@ -663,7 +665,8 @@ bool Inliner::doFinalization(CallGraph &CG) {
  }
  
  /// Remove dead functions that are not included in DNR (Do Not Remove) list.
-bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
+bool LegacyInlinerBase::removeDeadFunctions(CallGraph &CG,
+                                            bool AlwaysInlineOnly) {
    SmallVector<CallGraphNode *, 16> FunctionsToRemove;
    SmallVector<CallGraphNode *, 16> DeadFunctionsInComdats;
    SmallDenseMap<const Comdat *, int, 16> ComdatEntriesAlive;
@@ -765,3 +768,171 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
    }
    return true;
  }
+
+PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
+                                   CGSCCAnalysisManager &AM, LazyCallGraph &CG,
+                                   CGSCCUpdateResult &UR) {
+  FunctionAnalysisManager &FAM =
+      AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG)
+          .getManager();
+  const ModuleAnalysisManager &MAM =
+      AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG).getManager();
+  bool Changed = false;
+
+  assert(InitialC.size() > 0 && "Cannot handle an empty SCC!");
+  Module &M = *InitialC.begin()->getFunction().getParent();
+  ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M);
+
+  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
+      [&](Function &F) -> AssumptionCache & {
+    return FAM.getResult<AssumptionAnalysis>(F);
+  };
+
+  // Setup the data structure used to plumb customization into the
+  // `InlineFunction` routine.
+  InlineFunctionInfo IFI(/*cg=*/nullptr);
+
+  auto GetInlineCost = [&](CallSite CS) {
+    Function &Callee = *CS.getCalledFunction();
+    auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
+    return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, PSI);
+  };
+
+  // We use a worklist of nodes to process so that we can handle if the SCC
+  // structure changes and some nodes are no longer part of the current SCC. We
+  // also need to use an updatable pointer for the SCC as a consequence.
+  SmallVector<LazyCallGraph::Node *, 16> Nodes;
+  for (auto &N : InitialC)
+    Nodes.push_back(&N);
+  auto *C = &InitialC;
+  auto *RC = &C->getOuterRefSCC();
+
+  // We also use a secondary worklist of call sites within a particular node to
+  // allow quickly continuing to inline through newly inlined call sites where
+  // possible.
+  SmallVector<CallSite, 16> Calls;
+
+  // Track a set vector of inlined callees so that we can augment the caller
+  // with all of their edges in the call graph before pruning out the ones that
+  // got simplified away.
+  SmallSetVector<Function *, 4> InlinedCallees;
+
+  // Track the dead functions to delete once finished with inlining calls. We
+  // defer deleting these to make it easier to handle the call graph updates.
+  SmallVector<Function *, 4> DeadFunctions;
+
+  do {
+    auto &N = *Nodes.pop_back_val();
+    if (CG.lookupSCC(N) != C)
+      continue;
+    Function &F = N.getFunction();
+    if (F.hasFnAttribute(Attribute::OptimizeNone))
+      continue;
+
+    // Get the remarks emission analysis for the caller.
+    auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+
+    // We want to generally process call sites top-down in order for
+    // simplifications stemming from replacing the call with the returned value
+    // after inlining to be visible to subsequent inlining decisions. So we
+    // walk the function backwards and then process the back of the vector.
+    // FIXME: Using reverse is a really bad way to do this. Instead we should
+    // do an actual PO walk of the function body.
+    for (Instruction &I : reverse(instructions(F)))
+      if (auto CS = CallSite(&I))
+        if (Function *Callee = CS.getCalledFunction())
+          if (!Callee->isDeclaration())
+            Calls.push_back(CS);
+
+    bool DidInline = false;
+    while (!Calls.empty()) {
+      CallSite CS = Calls.pop_back_val();
+      Function &Callee = *CS.getCalledFunction();
+
+      // Check whether we want to inline this callsite.
+      if (!shouldInline(CS, GetInlineCost, ORE))
+        continue;
+
+      if (!InlineFunction(CS, IFI))
+        continue;
+      DidInline = true;
+      InlinedCallees.insert(&Callee);
+
+      // Add any new callsites to defined functions to the worklist.
+      for (CallSite &CS : reverse(IFI.InlinedCallSites))
+        if (Function *NewCallee = CS.getCalledFunction())
+          if (!NewCallee->isDeclaration())
+            Calls.push_back(CS);
+
+      // For local functions, check whether this makes the callee trivially
+      // dead. In that case, we can drop the body of the function eagerly
+      // which may reduce the number of callers of other functions to one,
+      // changing inline cost thresholds.
+      if (Callee.hasLocalLinkage()) {
+        // To check this we also need to nuke any dead constant uses (perhaps
+        // made dead by this operation on other functions).
+        Callee.removeDeadConstantUsers();
+        if (Callee.use_empty()) {
+          // Clear the body and queue the function itself for deletion when we
+          // finish inlining and call graph updates.
+          // Note that after this point, it is an error to do anything other
+          // than use the callee's address or delete it.
+          Callee.dropAllReferences();
+          assert(find(DeadFunctions, &Callee) == DeadFunctions.end() &&
+                 "Cannot put cause a function to become dead twice!");
+          DeadFunctions.push_back(&Callee);
+        }
+      }
+    }
+
+    if (!DidInline)
+      continue;
+    Changed = true;
+
+    // Add all the inlined callees' edges to the caller. These are by
+    // definition trivial edges as we already had a transitive call edge to the
+    // callee.
+    for (Function *InlinedCallee : InlinedCallees) {
+      LazyCallGraph::Node &CalleeN = *CG.lookup(*InlinedCallee);
+      for (LazyCallGraph::Edge &E : CalleeN)
+        if (E.isCall())
+          RC->insertTrivialCallEdge(N, *E.getNode());
+        else
+          RC->insertTrivialRefEdge(N, *E.getNode());
+    }
+    InlinedCallees.clear();
+
+    // At this point, since we have made changes we have at least removed
+    // a call instruction. However, in the process we do some incremental
+    // simplification of the surrounding code. This simplification can
+    // essentially do all of the same things as a function pass and we can
+    // re-use the exact same logic for updating the call graph to reflect the
+    // change..
+    C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR);
+    RC = &C->getOuterRefSCC();
+  } while (!Nodes.empty());
+
+  // Now that we've finished inlining all of the calls across this SCC, delete
+  // all of the trivially dead functions, updating the call graph and the CGSCC
+  // pass manager in the process.
+  //
+  // Note that this walks a pointer set which has non-deterministic order but
+  // that is OK as all we do is delete things and add pointers to unordered
+  // sets.
+  for (Function *DeadF : DeadFunctions) {
+    // Get the necessary information out of the call graph and nuke the
+    // function there.
+    auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF));
+    auto &DeadRC = DeadC.getOuterRefSCC();
+    CG.removeDeadFunction(*DeadF);
+
+    // Mark the relevant parts of the call graph as invalid so we don't visit
+    // them.
+    UR.InvalidatedSCCs.insert(&DeadC);
+    UR.InvalidatedRefSCCs.insert(&DeadRC);
+
+    // And delete the actual function from the module.
+    M.getFunctionList().erase(DeadF);
+  }
+  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp

index ee083f91c7a04662367731256e01d890e28c67dc..4ab8450f35ec26318bdca63670e9ac61e62ffb6c 100644 (file)
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -1644,8 +1644,16 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
      }
  
      // Update the callgraph if requested.
-    if (IFI.CG)
+    if (IFI.CG) {
        UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
+    } else {
+      // Otherwise just collect the raw call sites that were inlined.
+      for (BasicBlock &NewBB :
+           make_range(FirstNewBlock->getIterator(), Caller->end()))
+        for (Instruction &I : NewBB)
+          if (auto CS = CallSite(&I))
+            IFI.InlinedCallSites.push_back(CS);
+    }
  
      // For 'nodebug' functions, the associated DISubprogram is always null.
      // Conservatively avoid propagating the callsite debug location to
diff --git a/test/Transforms/Inline/basictest.ll b/test/Transforms/Inline/basictest.ll

index 5847e623831fc1c360d0e6aa2669293fe1f8303b..b98644cd2dd446d2b65bb8d0d6ee54f3e9343eed 100644 (file)
--- a/test/Transforms/Inline/basictest.ll
+++ b/test/Transforms/Inline/basictest.ll
@@ -1,4 +1,5 @@
  ; RUN: opt < %s -inline -sroa -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(inline,function(sroa))' -S | FileCheck %s
  target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
  
  define i32 @test1f(i32 %i) {
diff --git a/test/Transforms/Inline/cgscc-update.ll b/test/Transforms/Inline/cgscc-update.ll

new file mode 100644 (file)

index 0000000..cf1ac4d
--- /dev/null
+++ b/test/Transforms/Inline/cgscc-update.ll
@@ -0,0 +1,145 @@
+; RUN: opt < %s -aa-pipeline=basic-aa -passes='cgscc(function-attrs,inline)' -S | FileCheck %s
+; This test runs the inliner and the function attribute deduction. It ensures
+; that when the inliner mutates the call graph it correctly updates the CGSCC
+; iteration so that we can compute refined function attributes. In this way it
+; is leveraging function attribute computation to observe correct call graph
+; updates.
+
+; Boring unknown external function call.
+; CHECK: declare void @unknown()
+declare void @unknown()
+
+; Sanity check: this should get annotated as readnone.
+; CHECK: Function Attrs: readnone
+; CHECK-NEXT: declare void @readnone()
+declare void @readnone() readnone
+
+; The 'test1_' prefixed functions are designed to trigger forming a new direct
+; call in the inlined body of the function. After that, we form a new SCC and
+; using that can deduce precise function attrs.
+
+; This function should no longer exist.
+; CHECK-NOT: @test1_f()
+define internal void @test1_f(void()* %p) {
+entry:
+  call void %p()
+  ret void
+}
+
+; This function should have had 'readnone' deduced for its SCC.
+; CHECK: Function Attrs: noinline readnone
+; CHECK-NEXT: define void @test1_g()
+define void @test1_g() noinline {
+entry:
+  call void @test1_f(void()* @test1_h)
+  ret void
+}
+
+; This function should have had 'readnone' deduced for its SCC.
+; CHECK: Function Attrs: noinline readnone
+; CHECK-NEXT: define void @test1_h()
+define void @test1_h() noinline {
+entry:
+  call void @test1_g()
+  call void @readnone()
+  ret void
+}
+
+
+; The 'test2_' prefixed functions are designed to trigger forming a new direct
+; call due to RAUW-ing the returned value of a called function into the caller.
+; This too should form a new SCC which can then be reasoned about to compute
+; precise function attrs.
+
+; This function should no longer exist.
+; CHECK-NOT: @test2_f()
+define internal void()* @test2_f() {
+entry:
+  ret void()* @test2_h
+}
+
+; This function should have had 'readnone' deduced for its SCC.
+; CHECK: Function Attrs: noinline readnone
+; CHECK-NEXT: define void @test2_g()
+define void @test2_g() noinline {
+entry:
+  %p = call void()* @test2_f()
+  call void %p()
+  ret void
+}
+
+; This function should have had 'readnone' deduced for its SCC.
+; CHECK: Function Attrs: noinline readnone
+; CHECK-NEXT: define void @test2_h()
+define void @test2_h() noinline {
+entry:
+  call void @test2_g()
+  call void @readnone()
+  ret void
+}
+
+
+; The 'test3_' prefixed functions are designed to inline in a way that causes
+; call sites to become trivially dead during the middle of inlining callsites of
+; a single function to make sure that the inliner does not get confused by this
+; pattern.
+
+; CHECK-NOT: @test3_maybe_unknown(
+define internal void @test3_maybe_unknown(i1 %b) {
+entry:
+  br i1 %b, label %then, label %exit
+
+then:
+  call void @unknown()
+  br label %exit
+
+exit:
+  ret void
+}
+
+; CHECK-NOT: @test3_f(
+define internal i1 @test3_f() {
+entry:
+  ret i1 false
+}
+
+; CHECK-NOT: @test3_g(
+define internal i1 @test3_g(i1 %b) {
+entry:
+  br i1 %b, label %then1, label %if2
+
+then1:
+  call void @test3_maybe_unknown(i1 true)
+  br label %if2
+
+if2:
+  %f = call i1 @test3_f()
+  br i1 %f, label %then2, label %exit
+
+then2:
+  call void @test3_maybe_unknown(i1 true)
+  br label %exit
+
+exit:
+  ret i1 false
+}
+
+; FIXME: Currently the inliner doesn't successfully mark this as readnone
+; because while it simplifies trivially dead CFGs when inlining callees it
+; doesn't simplify the caller's trivially dead CFG and so we end with a dead
+; block calling @unknown.
+; CHECK-NOT: Function Attrs: readnone
+; CHECK: define void @test3_h()
+define void @test3_h() {
+entry:
+  %g = call i1 @test3_g(i1 false)
+  br i1 %g, label %then, label %exit
+
+then:
+  call void @test3_maybe_unknown(i1 true)
+  br label %exit
+
+exit:
+  call void @test3_maybe_unknown(i1 false)
+  ret void
+}
diff --git a/test/Transforms/Inline/last-callsite.ll b/test/Transforms/Inline/last-callsite.ll

new file mode 100644 (file)

index 0000000..b0fe7d9
--- /dev/null
+++ b/test/Transforms/Inline/last-callsite.ll
@@ -0,0 +1,269 @@
+; RUN: opt < %s -passes='cgscc(inline)' -inline-threshold=0 -S | FileCheck %s
+
+; The 'test1_' prefixed functions test the basic 'last callsite' inline
+; threshold adjustment where we specifically inline the last call site of an
+; internal function regardless of cost.
+
+define internal void @test1_f() {
+entry:
+  %p = alloca i32
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  ret void
+}
+
+; Identical to @test1_f but doesn't get inlined because there is more than one
+; call. If this *does* get inlined, the body used both here and in @test1_f
+; isn't a good test for different threshold based on the last call.
+define internal void @test1_g() {
+entry:
+  %p = alloca i32
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  ret void
+}
+
+define void @test1() {
+; CHECK-LABEL: define void @test1()
+entry:
+  call void @test1_f()
+; CHECK-NOT: @test1_f
+
+  call void @test1_g()
+  call void @test1_g()
+; CHECK: call void @test1_g()
+; CHECK: call void @test1_g()
+
+  ret void
+}
+
+
+; The 'test2_' prefixed functions test that we can discover the last callsite
+; bonus after having inlined the prior call site. For this to to work, we need
+; a callsite dependent cost so we have a trivial predicate guarding all the
+; cost, and set that in a particular direction.
+
+define internal void @test2_f(i1 %b) {
+entry:
+  %p = alloca i32
+  br i1 %b, label %then, label %exit
+
+then:
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Identical to @test2_f but doesn't get inlined because there is more than one
+; call. If this *does* get inlined, the body used both here and in @test2_f
+; isn't a good test for different threshold based on the last call.
+define internal void @test2_g(i1 %b) {
+entry:
+  %p = alloca i32
+  br i1 %b, label %then, label %exit
+
+then:
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  br label %exit
+
+exit:
+  ret void
+}
+
+define void @test2() {
+; CHECK-LABEL: define void @test2()
+entry:
+  ; The first call is trivial to inline due to the argument.
+  call void @test2_f(i1 false)
+; CHECK-NOT: @test2_f
+
+  ; The second call is too expensive to inline unless we update the number of
+  ; calls after inlining the second.
+  call void @test2_f(i1 true)
+; CHECK-NOT: @test2_f
+
+  ; Sanity check that two calls with the hard predicate remain uninlined.
+  call void @test2_g(i1 true)
+  call void @test2_g(i1 true)
+; CHECK: call void @test2_g(i1 true)
+; CHECK: call void @test2_g(i1 true)
+
+  ret void
+}
+
+
+; The 'test3_' prefixed functions are similar to the 'test2_' functions but the
+; relative order of the trivial and hard to inline callsites is reversed. This
+; checks that the order of calls isn't significant to whether we observe the
+; "last callsite" threshold difference because the next-to-last gets inlined.
+; FIXME: We don't currently catch this case.
+
+define internal void @test3_f(i1 %b) {
+entry:
+  %p = alloca i32
+  br i1 %b, label %then, label %exit
+
+then:
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Identical to @test3_f but doesn't get inlined because there is more than one
+; call. If this *does* get inlined, the body used both here and in @test3_f
+; isn't a good test for different threshold based on the last call.
+define internal void @test3_g(i1 %b) {
+entry:
+  %p = alloca i32
+  br i1 %b, label %then, label %exit
+
+then:
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  br label %exit
+
+exit:
+  ret void
+}
+
+define void @test3() {
+; CHECK-LABEL: define void @test3()
+entry:
+  ; The first call is too expensive to inline unless we update the number of
+  ; calls after inlining the second.
+  call void @test3_f(i1 true)
+; FIXME: We should inline this call without iteration.
+; CHECK: call void @test3_f(i1 true)
+
+  ; But the second call is trivial to inline due to the argument.
+  call void @test3_f(i1 false)
+; CHECK-NOT: @test3_f
+
+  ; Sanity check that two calls with the hard predicate remain uninlined.
+  call void @test3_g(i1 true)
+  call void @test3_g(i1 true)
+; CHECK: call void @test3_g(i1 true)
+; CHECK: call void @test3_g(i1 true)
+
+  ret void
+}
+
+
+; The 'test4_' prefixed functions are similar to the 'test2_' prefixed
+; functions but include unusual constant expressions that make discovering that
+; a function is dead harder.
+
+define internal void @test4_f(i1 %b) {
+entry:
+  %p = alloca i32
+  br i1 %b, label %then, label %exit
+
+then:
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Identical to @test4_f but doesn't get inlined because there is more than one
+; call. If this *does* get inlined, the body used both here and in @test4_f
+; isn't a good test for different threshold based on the last call.
+define internal void @test4_g(i1 %b) {
+entry:
+  %p = alloca i32
+  br i1 %b, label %then, label %exit
+
+then:
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  store volatile i32 0, i32* %p
+  br label %exit
+
+exit:
+  ret void
+}
+
+define void @test4() {
+; CHECK-LABEL: define void @test4()
+entry:
+  ; The first call is trivial to inline due to the argument. However this
+  ; argument also uses the function being called as part of a complex
+  ; constant expression. Merely inlining and deleting the call isn't enough to
+  ; drop the use count here, we need to GC the dead constant expression as
+  ; well.
+  call void @test4_f(i1 icmp ne (i64 ptrtoint (void (i1)* @test4_f to i64), i64 ptrtoint(void (i1)* @test4_f to i64)))
+; CHECK-NOT: @test4_f
+
+  ; The second call is too expensive to inline unless we update the number of
+  ; calls after inlining the second.
+  call void @test4_f(i1 true)
+; CHECK-NOT: @test4_f
+
+  ; And check that a single call to a function which is used by a complex
+  ; constant expression cannot be inlined because the constant expression forms
+  ; a second use. If this part starts failing we need to use more complex
+  ; constant expressions to reference a particular function with them.
+  %sink = alloca i1
+  store volatile i1 icmp ne (i64 ptrtoint (void (i1)* @test4_g to i64), i64 ptrtoint(void (i1)* @test4_g to i64)), i1* %sink
+  call void @test4_g(i1 true)
+; CHECK: store volatile i1 false
+; CHECK: call void @test4_g(i1 true)
+
+  ret void
+}
diff --git a/test/Transforms/Inline/nested-inline.ll b/test/Transforms/Inline/nested-inline.ll

index 9d08ac0c19c62b71992e9629c1877589fce043e8..7a207f6a3f067db5ba40a743925b4ed8739c6bda 100644 (file)
--- a/test/Transforms/Inline/nested-inline.ll
+++ b/test/Transforms/Inline/nested-inline.ll
@@ -1,4 +1,5 @@
  ; RUN: opt < %s -inline -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s
  ; Test that bar and bar2 are both inlined throughout and removed.
  @A = weak global i32 0         ; <i32*> [#uses=1]
  @B = weak global i32 0         ; <i32*> [#uses=1]
author	Chandler Carruth <chandlerc@gmail.com>
	Tue, 20 Dec 2016 03:15:32 +0000 (03:15 +0000)
committer	Chandler Carruth <chandlerc@gmail.com>
	Tue, 20 Dec 2016 03:15:32 +0000 (03:15 +0000)
include/llvm/Analysis/CGSCCPassManager.h		patch \| blob \| history
include/llvm/Transforms/IPO/Inliner.h	[moved from include/llvm/Transforms/IPO/InlinerPass.h with 60% similarity]	patch \| blob \| history
include/llvm/Transforms/Utils/Cloning.h		patch \| blob \| history
lib/Analysis/InlineCost.cpp		patch \| blob \| history
lib/Passes/PassBuilder.cpp		patch \| blob \| history
lib/Passes/PassRegistry.def		patch \| blob \| history
lib/Transforms/IPO/AlwaysInliner.cpp		patch \| blob \| history
lib/Transforms/IPO/InlineSimple.cpp		patch \| blob \| history
lib/Transforms/IPO/Inliner.cpp		patch \| blob \| history
lib/Transforms/Utils/InlineFunction.cpp		patch \| blob \| history
test/Transforms/Inline/basictest.ll		patch \| blob \| history
test/Transforms/Inline/cgscc-update.ll	[new file with mode: 0644]	patch \| blob
test/Transforms/Inline/last-callsite.ll	[new file with mode: 0644]	patch \| blob
test/Transforms/Inline/nested-inline.ll		patch \| blob \| history