From 7d6a3b6798102bae358f91baf9e6b44ab796332b Mon Sep 17 00:00:00 2001 From: Matthew Simpson <mssimpso@codeaurora.org> Date: Wed, 25 Oct 2017 13:40:08 +0000 Subject: [PATCH] Add CalledValuePropagation pass This patch adds a new pass for attaching !callees metadata to indirect call sites. The pass propagates values to call sites by performing an IPSCCP-like analysis using the generic sparse propagation solver. For indirect call sites having a small set of possible callees, the attached metadata indicates what those callees are. The metadata can be used to facilitate optimizations like intersecting the function attributes of the possible callees, refining the call graph, performing indirect call promotion, etc. Differential Revision: https://reviews.llvm.org/D37355 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@316576 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm-c/Transforms/IPO.h | 3 + include/llvm/InitializePasses.h | 1 + include/llvm/LinkAllPasses.h | 1 + include/llvm/Transforms/IPO.h | 4 + .../Transforms/IPO/CalledValuePropagation.h | 35 ++ lib/Passes/PassBuilder.cpp | 9 + lib/Passes/PassRegistry.def | 1 + lib/Transforms/IPO/CMakeLists.txt | 1 + lib/Transforms/IPO/CalledValuePropagation.cpp | 422 ++++++++++++++++++ lib/Transforms/IPO/IPO.cpp | 5 + lib/Transforms/IPO/PassManagerBuilder.cpp | 5 + test/Other/new-pm-defaults.ll | 1 + test/Other/new-pm-lto-defaults.ll | 1 + test/Other/new-pm-thinlto-defaults.ll | 1 + .../simple-arguments.ll | 83 ++++ .../CalledValuePropagation/simple-memory.ll | 62 +++ .../CalledValuePropagation/simple-select.ll | 39 ++ 17 files changed, 674 insertions(+) create mode 100644 include/llvm/Transforms/IPO/CalledValuePropagation.h create mode 100644 lib/Transforms/IPO/CalledValuePropagation.cpp create mode 100644 test/Transforms/CalledValuePropagation/simple-arguments.ll create mode 100644 test/Transforms/CalledValuePropagation/simple-memory.ll create mode 100644 test/Transforms/CalledValuePropagation/simple-select.ll diff --git a/include/llvm-c/Transforms/IPO.h b/include/llvm-c/Transforms/IPO.h index 3af7425dd26..7705b1864dc 100644 --- a/include/llvm-c/Transforms/IPO.h +++ b/include/llvm-c/Transforms/IPO.h @@ -34,6 +34,9 @@ void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM); /** See llvm::createConstantMergePass function. */ void LLVMAddConstantMergePass(LLVMPassManagerRef PM); +/** See llvm::createCalledValuePropagationPass function. */ +void LLVMAddCalledValuePropagationPass(LLVMPassManagerRef PM); + /** See llvm::createDeadArgEliminationPass function. */ void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM); diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index bf54b6471f4..6b0e6acadad 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -93,6 +93,7 @@ void initializeCallGraphViewerPass(PassRegistry&); void initializeCallGraphWrapperPassPass(PassRegistry&); void initializeCodeGenPreparePass(PassRegistry&); void initializeConstantHoistingLegacyPassPass(PassRegistry&); +void initializeCalledValuePropagationLegacyPassPass(PassRegistry &); void initializeConstantMergeLegacyPassPass(PassRegistry&); void initializeConstantPropagationPass(PassRegistry&); void initializeCorrelatedValuePropagationPass(PassRegistry&); diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h index 29314617177..abc3bac9367 100644 --- a/include/llvm/LinkAllPasses.h +++ b/include/llvm/LinkAllPasses.h @@ -80,6 +80,7 @@ namespace { (void) llvm::createCFLSteensAAWrapperPass(); (void) llvm::createStructurizeCFGPass(); (void) llvm::createLibCallsShrinkWrapPass(); + (void) llvm::createCalledValuePropagationPass(); (void) llvm::createConstantMergePass(); (void) llvm::createConstantPropagationPass(); (void) llvm::createCostModelAnalysisPass(); diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h index 39ceb19525b..ce20a726b78 100644 --- a/include/llvm/Transforms/IPO.h +++ b/include/llvm/Transforms/IPO.h @@ -216,6 +216,10 @@ ModulePass *createMetaRenamerPass(); /// manager. ModulePass *createBarrierNoopPass(); +/// createCalledValuePropagationPass - Attach metadata to indirct call sites +/// indicating the set of functions they may target at run-time. +ModulePass *createCalledValuePropagationPass(); + /// What to do with the summary when running passes that operate on it. enum class PassSummaryAction { None, ///< Do nothing. diff --git a/include/llvm/Transforms/IPO/CalledValuePropagation.h b/include/llvm/Transforms/IPO/CalledValuePropagation.h new file mode 100644 index 00000000000..352bdc7ac17 --- /dev/null +++ b/include/llvm/Transforms/IPO/CalledValuePropagation.h @@ -0,0 +1,35 @@ +//===- CalledValuePropagation.h - Propagate called values -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a transformation that attaches !callees metadata to +// indirect call sites. For a given call site, the metadata, if present, +// indicates the set of functions the call site could possibly target at +// run-time. This metadata is added to indirect call sites when the set of +// possible targets can be determined by analysis and is known to be small. The +// analysis driving the transformation is similar to constant propagation and +// makes uses of the generic sparse propagation solver. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_CALLEDVALUEPROPAGATION_H +#define LLVM_TRANSFORMS_IPO_CALLEDVALUEPROPAGATION_H + +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class CalledValuePropagationPass + : public PassInfoMixin<CalledValuePropagationPass> { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &); +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_CALLEDVALUEPROPAGATION_H diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp index 219c3835f47..8796ff56e5e 100644 --- a/lib/Passes/PassBuilder.cpp +++ b/lib/Passes/PassBuilder.cpp @@ -63,6 +63,7 @@ #include "llvm/Transforms/GCOVProfiler.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/ArgumentPromotion.h" +#include "llvm/Transforms/IPO/CalledValuePropagation.h" #include "llvm/Transforms/IPO/ConstantMerge.h" #include "llvm/Transforms/IPO/CrossDSOCFI.h" #include "llvm/Transforms/IPO/DeadArgumentElimination.h" @@ -580,6 +581,10 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // years, it should be re-analyzed. MPM.addPass(IPSCCPPass()); + // Attach metadata to indirect call sites indicating the set of functions + // they may target at run-time. This should follow IPSCCP. + MPM.addPass(CalledValuePropagationPass()); + // Optimize globals to try and fold them into constants. MPM.addPass(GlobalOptPass()); @@ -921,6 +926,10 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. MPM.addPass(IPSCCPPass()); + + // Attach metadata to indirect call sites indicating the set of functions + // they may target at run-time. This should follow IPSCCP. + MPM.addPass(CalledValuePropagationPass()); } // Now deduce any function attributes based in the current code. diff --git a/lib/Passes/PassRegistry.def b/lib/Passes/PassRegistry.def index bfe3dd782c1..20d1220ac33 100644 --- a/lib/Passes/PassRegistry.def +++ b/lib/Passes/PassRegistry.def @@ -39,6 +39,7 @@ MODULE_ALIAS_ANALYSIS("globals-aa", GlobalsAA()) #define MODULE_PASS(NAME, CREATE_PASS) #endif MODULE_PASS("always-inline", AlwaysInlinerPass()) +MODULE_PASS("called-value-propagation", CalledValuePropagationPass()) MODULE_PASS("constmerge", ConstantMergePass()) MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass()) MODULE_PASS("deadargelim", DeadArgumentEliminationPass()) diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index 67f18a307b9..397561746f8 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -2,6 +2,7 @@ add_llvm_library(LLVMipo AlwaysInliner.cpp ArgumentPromotion.cpp BarrierNoopPass.cpp + CalledValuePropagation.cpp ConstantMerge.cpp CrossDSOCFI.cpp DeadArgumentElimination.cpp diff --git a/lib/Transforms/IPO/CalledValuePropagation.cpp b/lib/Transforms/IPO/CalledValuePropagation.cpp new file mode 100644 index 00000000000..2d2701c5899 --- /dev/null +++ b/lib/Transforms/IPO/CalledValuePropagation.cpp @@ -0,0 +1,422 @@ +//===- CalledValuePropagation.cpp - Propagate called values -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a transformation that attaches !callees metadata to +// indirect call sites. For a given call site, the metadata, if present, +// indicates the set of functions the call site could possibly target at +// run-time. This metadata is added to indirect call sites when the set of +// possible targets can be determined by analysis and is known to be small. The +// analysis driving the transformation is similar to constant propagation and +// makes uses of the generic sparse propagation solver. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/CalledValuePropagation.h" +#include "llvm/Analysis/SparsePropagation.h" +#include "llvm/Analysis/ValueLatticeUtils.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/Transforms/IPO.h" +using namespace llvm; + +#define DEBUG_TYPE "called-value-propagation" + +/// The maximum number of functions to track per lattice value. Once the number +/// of functions a call site can possibly target exceeds this threshold, it's +/// lattice value becomes overdefined. The number of possible lattice values is +/// bounded by Ch(F, M), where F is the number of functions in the module and M +/// is MaxFunctionsPerValue. As such, this value should be kept very small. We +/// likely can't do anything useful for call sites with a large number of +/// possible targets, anyway. +static cl::opt<unsigned> MaxFunctionsPerValue( + "cvp-max-functions-per-value", cl::Hidden, cl::init(4), + cl::desc("The maximum number of functions to track per lattice value")); + +namespace { +/// To enable interprocedural analysis, we assign LLVM values to the following +/// groups. The register group represents SSA registers, the return group +/// represents the return values of functions, and the memory group represents +/// in-memory values. An LLVM Value can technically be in more than one group. +/// It's necessary to distinguish these groups so we can, for example, track a +/// global variable separately from the value stored at its location. +enum class IPOGrouping { Register, Return, Memory }; + +/// Our LatticeKeys are PointerIntPairs composed of LLVM values and groupings. +using CVPLatticeKey = PointerIntPair<Value *, 2, IPOGrouping>; + +/// The lattice value type used by our custom lattice function. It holds the +/// lattice state, and a set of functions. +class CVPLatticeVal { +public: + /// The states of the lattice values. Only the FunctionSet state is + /// interesting. It indicates the set of functions to which an LLVM value may + /// refer. + enum CVPLatticeStateTy { Undefined, FunctionSet, Overdefined, Untracked }; + + /// Comparator for sorting the functions set. We want to keep the order + /// deterministic for testing, etc. + struct Compare { + bool operator()(const Function *LHS, const Function *RHS) const { + return LHS->getName() < RHS->getName(); + } + }; + + CVPLatticeVal() : LatticeState(Undefined) {} + CVPLatticeVal(CVPLatticeStateTy LatticeState) : LatticeState(LatticeState) {} + CVPLatticeVal(std::set<Function *, Compare> &&Functions) + : LatticeState(FunctionSet), Functions(Functions) {} + + /// Get a reference to the functions held by this lattice value. The number + /// of functions will be zero for states other than FunctionSet. + const std::set<Function *, Compare> &getFunctions() const { + return Functions; + } + + /// Returns true if the lattice value is in the FunctionSet state. + bool isFunctionSet() const { return LatticeState == FunctionSet; } + + bool operator==(const CVPLatticeVal &RHS) const { + return LatticeState == RHS.LatticeState && Functions == RHS.Functions; + } + + bool operator!=(const CVPLatticeVal &RHS) const { + return LatticeState != RHS.LatticeState || Functions != RHS.Functions; + } + +private: + /// Holds the state this lattice value is in. + CVPLatticeStateTy LatticeState; + + /// Holds functions indicating the possible targets of call sites. This set + /// is empty for lattice values in the undefined, overdefined, and untracked + /// states. The maximum size of the set is controlled by + /// MaxFunctionsPerValue. Since most LLVM values are expected to be in + /// uninteresting states (i.e., overdefined), CVPLatticeVal objects should be + /// small and efficiently copyable. + std::set<Function *, Compare> Functions; +}; + +/// The custom lattice function used by the generic sparse propagation solver. +/// It handles merging lattice values and computing new lattice values for +/// constants, arguments, values returned from trackable functions, and values +/// located in trackable global variables. It also computes the lattice values +/// that change as a result of executing instructions. +class CVPLatticeFunc + : public AbstractLatticeFunction<CVPLatticeKey, CVPLatticeVal> { +public: + CVPLatticeFunc() + : AbstractLatticeFunction(CVPLatticeVal(CVPLatticeVal::Undefined), + CVPLatticeVal(CVPLatticeVal::Overdefined), + CVPLatticeVal(CVPLatticeVal::Untracked)) {} + + /// Compute and return a CVPLatticeVal for the given CVPLatticeKey. + CVPLatticeVal ComputeLatticeVal(CVPLatticeKey Key) override { + switch (Key.getInt()) { + case IPOGrouping::Register: + if (isa<Instruction>(Key.getPointer())) { + return getUndefVal(); + } else if (auto *A = dyn_cast<Argument>(Key.getPointer())) { + if (canTrackArgumentsInterprocedurally(A->getParent())) + return getUndefVal(); + } else if (auto *C = dyn_cast<Constant>(Key.getPointer())) { + return computeConstant(C); + } + return getOverdefinedVal(); + case IPOGrouping::Memory: + case IPOGrouping::Return: + if (auto *GV = dyn_cast<GlobalVariable>(Key.getPointer())) { + if (canTrackGlobalVariableInterprocedurally(GV)) + return computeConstant(GV->getInitializer()); + } else if (auto *F = cast<Function>(Key.getPointer())) + if (canTrackReturnsInterprocedurally(F)) + return getUndefVal(); + } + return getOverdefinedVal(); + } + + /// Merge the two given lattice values. The interesting cases are merging two + /// FunctionSet values and a FunctionSet value with an Undefined value. For + /// these cases, we simply union the function sets. If the size of the union + /// is greater than the maximum functions we track, the merged value is + /// overdefined. + CVPLatticeVal MergeValues(CVPLatticeVal X, CVPLatticeVal Y) override { + if (X == getOverdefinedVal() || Y == getOverdefinedVal()) + return getOverdefinedVal(); + if (X == getUndefVal() && Y == getUndefVal()) + return getUndefVal(); + std::set<Function *, CVPLatticeVal::Compare> Union; + std::set_union(X.getFunctions().begin(), X.getFunctions().end(), + Y.getFunctions().begin(), Y.getFunctions().end(), + std::inserter(Union, Union.begin())); + if (Union.size() > MaxFunctionsPerValue) + return getOverdefinedVal(); + return CVPLatticeVal(std::move(Union)); + } + + /// Compute the lattice values that change as a result of executing the given + /// instruction. The changed values are stored in \p ChangedValues. We handle + /// just a few kinds of instructions since we're only propagating values that + /// can be called. + void ComputeInstructionState( + Instruction &I, DenseMap<CVPLatticeKey, CVPLatticeVal> &ChangedValues, + SparseSolver<CVPLatticeKey, CVPLatticeVal> &SS) override { + switch (I.getOpcode()) { + case Instruction::Call: + return visitCallSite(cast<CallInst>(&I), ChangedValues, SS); + case Instruction::Invoke: + return visitCallSite(cast<InvokeInst>(&I), ChangedValues, SS); + case Instruction::Load: + return visitLoad(*cast<LoadInst>(&I), ChangedValues, SS); + case Instruction::Ret: + return visitReturn(*cast<ReturnInst>(&I), ChangedValues, SS); + case Instruction::Select: + return visitSelect(*cast<SelectInst>(&I), ChangedValues, SS); + case Instruction::Store: + return visitStore(*cast<StoreInst>(&I), ChangedValues, SS); + default: + return visitInst(I, ChangedValues, SS); + } + } + + /// Print the given CVPLatticeVal to the specified stream. + void PrintLatticeVal(CVPLatticeVal LV, raw_ostream &OS) override { + if (LV == getUndefVal()) + OS << "Undefined "; + else if (LV == getOverdefinedVal()) + OS << "Overdefined"; + else if (LV == getUntrackedVal()) + OS << "Untracked "; + else + OS << "FunctionSet"; + } + + /// Print the given CVPLatticeKey to the specified stream. + void PrintLatticeKey(CVPLatticeKey Key, raw_ostream &OS) override { + if (Key.getInt() == IPOGrouping::Register) + OS << "<reg> "; + else if (Key.getInt() == IPOGrouping::Memory) + OS << "<mem> "; + else if (Key.getInt() == IPOGrouping::Return) + OS << "<ret> "; + if (isa<Function>(Key.getPointer())) + OS << Key.getPointer()->getName(); + else + OS << *Key.getPointer(); + } + + /// We collect a set of indirect calls when visiting call sites. This method + /// returns a reference to that set. + SmallPtrSetImpl<Instruction *> &getIndirectCalls() { return IndirectCalls; } + +private: + /// Holds the indirect calls we encounter during the analysis. We will attach + /// metadata to these calls after the analysis indicating the functions the + /// calls can possibly target. + SmallPtrSet<Instruction *, 32> IndirectCalls; + + /// Compute a new lattice value for the given constant. The constant, after + /// stripping any pointer casts, should be a Function. We ignore null + /// pointers as an optimization, since calling these values is undefined + /// behavior. + CVPLatticeVal computeConstant(Constant *C) { + if (isa<ConstantPointerNull>(C)) + return CVPLatticeVal(CVPLatticeVal::FunctionSet); + if (auto *F = dyn_cast<Function>(C->stripPointerCasts())) + return CVPLatticeVal({F}); + return getOverdefinedVal(); + } + + /// Handle return instructions. The function's return state is the merge of + /// the returned value state and the function's return state. + void visitReturn(ReturnInst &I, + DenseMap<CVPLatticeKey, CVPLatticeVal> &ChangedValues, + SparseSolver<CVPLatticeKey, CVPLatticeVal> &SS) { + Function *F = I.getParent()->getParent(); + if (F->getReturnType()->isVoidTy()) + return; + auto RegI = CVPLatticeKey(I.getReturnValue(), IPOGrouping::Register); + auto RetF = CVPLatticeKey(F, IPOGrouping::Return); + ChangedValues[RetF] = + MergeValues(SS.getValueState(RegI), SS.getValueState(RetF)); + } + + /// Handle call sites. The state of a called function's formal arguments is + /// the merge of the argument state with the call sites corresponding actual + /// argument state. The call site state is the merge of the call site state + /// with the returned value state of the called function. + void visitCallSite(CallSite CS, + DenseMap<CVPLatticeKey, CVPLatticeVal> &ChangedValues, + SparseSolver<CVPLatticeKey, CVPLatticeVal> &SS) { + Function *F = CS.getCalledFunction(); + Instruction *I = CS.getInstruction(); + auto RegI = CVPLatticeKey(I, IPOGrouping::Register); + + // If this is an indirect call, save it so we can quickly revisit it when + // attaching metadata. + if (!F) + IndirectCalls.insert(I); + + // If we can't track the function's return values, there's nothing to do. + if (!F || !canTrackReturnsInterprocedurally(F)) { + ChangedValues[RegI] = getOverdefinedVal(); + return; + } + + // Inform the solver that the called function is executable, and perform + // the merges for the arguments and return value. + SS.MarkBlockExecutable(&F->front()); + auto RetF = CVPLatticeKey(F, IPOGrouping::Return); + for (Argument &A : F->args()) { + auto RegFormal = CVPLatticeKey(&A, IPOGrouping::Register); + auto RegActual = + CVPLatticeKey(CS.getArgument(A.getArgNo()), IPOGrouping::Register); + ChangedValues[RegFormal] = + MergeValues(SS.getValueState(RegFormal), SS.getValueState(RegActual)); + } + ChangedValues[RegI] = + MergeValues(SS.getValueState(RegI), SS.getValueState(RetF)); + } + + /// Handle select instructions. The select instruction state is the merge the + /// true and false value states. + void visitSelect(SelectInst &I, + DenseMap<CVPLatticeKey, CVPLatticeVal> &ChangedValues, + SparseSolver<CVPLatticeKey, CVPLatticeVal> &SS) { + auto RegI = CVPLatticeKey(&I, IPOGrouping::Register); + auto RegT = CVPLatticeKey(I.getTrueValue(), IPOGrouping::Register); + auto RegF = CVPLatticeKey(I.getFalseValue(), IPOGrouping::Register); + ChangedValues[RegI] = + MergeValues(SS.getValueState(RegT), SS.getValueState(RegF)); + } + + /// Handle load instructions. If the pointer operand of the load is a global + /// variable, we attempt to track the value. The loaded value state is the + /// merge of the loaded value state with the global variable state. + void visitLoad(LoadInst &I, + DenseMap<CVPLatticeKey, CVPLatticeVal> &ChangedValues, + SparseSolver<CVPLatticeKey, CVPLatticeVal> &SS) { + auto RegI = CVPLatticeKey(&I, IPOGrouping::Register); + if (auto *GV = dyn_cast<GlobalVariable>(I.getPointerOperand())) { + auto MemGV = CVPLatticeKey(GV, IPOGrouping::Memory); + ChangedValues[RegI] = + MergeValues(SS.getValueState(RegI), SS.getValueState(MemGV)); + } else { + ChangedValues[RegI] = getOverdefinedVal(); + } + } + + /// Handle store instructions. If the pointer operand of the store is a + /// global variable, we attempt to track the value. The global variable state + /// is the merge of the stored value state with the global variable state. + void visitStore(StoreInst &I, + DenseMap<CVPLatticeKey, CVPLatticeVal> &ChangedValues, + SparseSolver<CVPLatticeKey, CVPLatticeVal> &SS) { + auto *GV = dyn_cast<GlobalVariable>(I.getPointerOperand()); + if (!GV) + return; + auto RegI = CVPLatticeKey(I.getValueOperand(), IPOGrouping::Register); + auto MemGV = CVPLatticeKey(GV, IPOGrouping::Memory); + ChangedValues[MemGV] = + MergeValues(SS.getValueState(RegI), SS.getValueState(MemGV)); + } + + /// Handle all other instructions. All other instructions are marked + /// overdefined. + void visitInst(Instruction &I, + DenseMap<CVPLatticeKey, CVPLatticeVal> &ChangedValues, + SparseSolver<CVPLatticeKey, CVPLatticeVal> &SS) { + auto RegI = CVPLatticeKey(&I, IPOGrouping::Register); + ChangedValues[RegI] = getOverdefinedVal(); + } +}; +} // namespace + +namespace llvm { +/// A specialization of LatticeKeyInfo for CVPLatticeKeys. The generic solver +/// must translate between LatticeKeys and LLVM Values when adding Values to +/// its work list and inspecting the state of control-flow related values. +template <> struct LatticeKeyInfo<CVPLatticeKey> { + static inline Value *getValueFromLatticeKey(CVPLatticeKey Key) { + return Key.getPointer(); + } + static inline CVPLatticeKey getLatticeKeyFromValue(Value *V) { + return CVPLatticeKey(V, IPOGrouping::Register); + } +}; +} // namespace llvm + +static bool runCVP(Module &M) { + // Our custom lattice function and generic sparse propagation solver. + CVPLatticeFunc Lattice; + SparseSolver<CVPLatticeKey, CVPLatticeVal> Solver(&Lattice); + + // For each function in the module, if we can't track its arguments, let the + // generic solver assume it is executable. + for (Function &F : M) + if (!F.isDeclaration() && !canTrackArgumentsInterprocedurally(&F)) + Solver.MarkBlockExecutable(&F.front()); + + // Solver our custom lattice. In doing so, we will also build a set of + // indirect call sites. + Solver.Solve(); + + // Attach metadata to the indirect call sites that were collected indicating + // the set of functions they can possibly target. + bool Changed = false; + MDBuilder MDB(M.getContext()); + for (Instruction *C : Lattice.getIndirectCalls()) { + CallSite CS(C); + auto RegI = CVPLatticeKey(CS.getCalledValue(), IPOGrouping::Register); + CVPLatticeVal LV = Solver.getExistingValueState(RegI); + if (!LV.isFunctionSet() || LV.getFunctions().empty()) + continue; + MDNode *Callees = MDB.createCallees(SmallVector<Function *, 4>( + LV.getFunctions().begin(), LV.getFunctions().end())); + C->setMetadata(LLVMContext::MD_callees, Callees); + Changed = true; + } + + return Changed; +} + +PreservedAnalyses CalledValuePropagationPass::run(Module &M, + ModuleAnalysisManager &) { + runCVP(M); + return PreservedAnalyses::all(); +} + +namespace { +class CalledValuePropagationLegacyPass : public ModulePass { +public: + static char ID; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + + CalledValuePropagationLegacyPass() : ModulePass(ID) { + initializeCalledValuePropagationLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override { + if (skipModule(M)) + return false; + return runCVP(M); + } +}; +} // namespace + +char CalledValuePropagationLegacyPass::ID = 0; +INITIALIZE_PASS(CalledValuePropagationLegacyPass, "called-value-propagation", + "Called Value Propagation", false, false) + +ModulePass *llvm::createCalledValuePropagationPass() { + return new CalledValuePropagationLegacyPass(); +} diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp index 5bb305ca84d..d5d35ee89e0 100644 --- a/lib/Transforms/IPO/IPO.cpp +++ b/lib/Transforms/IPO/IPO.cpp @@ -25,6 +25,7 @@ using namespace llvm; void llvm::initializeIPO(PassRegistry &Registry) { initializeArgPromotionPass(Registry); + initializeCalledValuePropagationLegacyPassPass(Registry); initializeConstantMergeLegacyPassPass(Registry); initializeCrossDSOCFIPass(Registry); initializeDAEPass(Registry); @@ -67,6 +68,10 @@ void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createArgumentPromotionPass()); } +void LLVMAddCalledValuePropagationPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createCalledValuePropagationPass()); +} + void LLVMAddConstantMergePass(LLVMPassManagerRef PM) { unwrap(PM)->add(createConstantMergePass()); } diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 4fa780fa22c..35ca107c325 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -460,6 +460,7 @@ void PassManagerBuilder::populateModulePassManager( addExtensionsToPM(EP_ModuleOptimizerEarly, MPM); MPM.add(createIPSCCPPass()); // IP SCCP + MPM.add(createCalledValuePropagationPass()); MPM.add(createGlobalOptimizerPass()); // Optimize out global vars // Promote any localized global vars. MPM.add(createPromoteMemoryToRegisterPass()); @@ -703,6 +704,10 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. PM.add(createIPSCCPPass()); + + // Attach metadata to indirect call sites indicating the set of functions + // they may target at run-time. This should follow IPSCCP. + PM.add(createCalledValuePropagationPass()); } // Infer attributes about definitions. The readnone attribute in particular is diff --git a/test/Other/new-pm-defaults.ll b/test/Other/new-pm-defaults.ll index 6e5c679e3d9..816f75310e3 100644 --- a/test/Other/new-pm-defaults.ll +++ b/test/Other/new-pm-defaults.ll @@ -78,6 +78,7 @@ ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: IPSCCPPass +; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass ; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PromotePass> ; CHECK-O-NEXT: Running pass: DeadArgumentEliminationPass diff --git a/test/Other/new-pm-lto-defaults.ll b/test/Other/new-pm-lto-defaults.ll index e450a8eeb3b..fc52f70ff4c 100644 --- a/test/Other/new-pm-lto-defaults.ll +++ b/test/Other/new-pm-lto-defaults.ll @@ -34,6 +34,7 @@ ; CHECK-O2-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Function ; CHECK-O2-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis ; CHECK-O2-NEXT: Running pass: IPSCCPPass +; CHECK-O2-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}PostOrderFunctionAttrsPass> ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC ; CHECK-O1-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Function diff --git a/test/Other/new-pm-thinlto-defaults.ll b/test/Other/new-pm-thinlto-defaults.ll index 47074adbcb2..7d40ef3eea2 100644 --- a/test/Other/new-pm-thinlto-defaults.ll +++ b/test/Other/new-pm-thinlto-defaults.ll @@ -74,6 +74,7 @@ ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: IPSCCPPass +; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass ; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PromotePass> ; CHECK-O-NEXT: Running pass: DeadArgumentEliminationPass diff --git a/test/Transforms/CalledValuePropagation/simple-arguments.ll b/test/Transforms/CalledValuePropagation/simple-arguments.ll new file mode 100644 index 00000000000..34274f3b348 --- /dev/null +++ b/test/Transforms/CalledValuePropagation/simple-arguments.ll @@ -0,0 +1,83 @@ +; RUN: opt -called-value-propagation -S < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnueabi" + + +; This test checks that we propagate the functions through arguments and attach +; !callees metadata to the call. Such metadata can enable optimizations of this +; code sequence. +; +; For example, the code below a illustrates a contrived sort-like algorithm +; that accepts a pointer to a comparison function. Since the indirect call to +; the comparison function has only two targets, the call can be promoted to two +; direct calls using an if-then-else. The loop can then be unswitched and the +; called functions inlined. This essentially produces two loops, once +; specialized for each comparison. +; +; CHECK: %tmp3 = call i1 %cmp(i64* %tmp1, i64* %tmp2), !callees ![[MD:[0-9]+]] +; CHECK: ![[MD]] = !{i1 (i64*, i64*)* @ugt, i1 (i64*, i64*)* @ule} +; +define void @test_argument(i64* %x, i64 %n, i1 %flag) { +entry: + %tmp0 = sub i64 %n, 1 + br i1 %flag, label %then, label %else + +then: + call void @arrange_data(i64* %x, i64 %tmp0, i1 (i64*, i64*)* @ugt) + br label %merge + +else: + call void @arrange_data(i64* %x, i64 %tmp0, i1 (i64*, i64*)* @ule) + br label %merge + +merge: + ret void +} + +define internal void @arrange_data(i64* %x, i64 %n, i1 (i64*, i64*)* %cmp) { +entry: + %tmp0 = icmp eq i64 %n, 1 + br i1 %tmp0, label %merge, label %for.body + +for.body: + %i = phi i64 [ 0, %entry ], [ %i.next, %cmp.false ] + %i.next = add nuw nsw i64 %i, 1 + %tmp1 = getelementptr inbounds i64, i64* %x, i64 %i + %tmp2 = getelementptr inbounds i64, i64* %x, i64 %i.next + %tmp3 = call i1 %cmp(i64* %tmp1, i64* %tmp2) + br i1 %tmp3, label %cmp.true, label %cmp.false + +cmp.true: + call void @swap(i64* %tmp1, i64* %tmp2) + br label %cmp.false + +cmp.false: + %cond = icmp slt i64 %i.next, %n + br i1 %cond, label %for.body, label %for.end + +for.end: + %tmp4 = sub i64 %n, 1 + call void @arrange_data(i64* %x, i64 %tmp4, i1 (i64*, i64*)* %cmp) + br label %merge + +merge: + ret void +} + +define internal i1 @ugt(i64* %a, i64* %b) { +entry: + %tmp0 = load i64, i64* %a + %tmp1 = load i64, i64* %b + %tmp2 = icmp ugt i64 %tmp0, %tmp1 + ret i1 %tmp2 +} + +define internal i1 @ule(i64* %a, i64* %b) { +entry: + %tmp0 = load i64, i64* %a + %tmp1 = load i64, i64* %b + %tmp2 = icmp ule i64 %tmp0, %tmp1 + ret i1 %tmp2 +} + +declare void @swap(i64*, i64*) diff --git a/test/Transforms/CalledValuePropagation/simple-memory.ll b/test/Transforms/CalledValuePropagation/simple-memory.ll new file mode 100644 index 00000000000..e42f10c1436 --- /dev/null +++ b/test/Transforms/CalledValuePropagation/simple-memory.ll @@ -0,0 +1,62 @@ +; RUN: opt -called-value-propagation -S < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnueabi" + +@global_function = internal unnamed_addr global void ()* null, align 8 +@global_array = common unnamed_addr global i64* null, align 8 + +; This test checks that we propagate the functions through an internal global +; variable, and attach !callees metadata to the call. Such metadata can enable +; optimizations of this code sequence. +; +; For example, since both of the targeted functions have the "nounwind" and +; "readnone" function attributes, LICM can be made to move the call and the +; function pointer load outside the loop. This would then enable the loop +; vectorizer to vectorize the sum reduction. +; +; CHECK: call void %tmp0(), !callees ![[MD:[0-9]+]] +; CHECK: ![[MD]] = !{void ()* @invariant_1, void ()* @invariant_2} +; +define i64 @test_memory_entry(i64 %n, i1 %flag) { +entry: + br i1 %flag, label %then, label %else + +then: + store void ()* @invariant_1, void ()** @global_function + br label %merge + +else: + store void ()* @invariant_2, void ()** @global_function + br label %merge + +merge: + %tmp1 = call i64 @test_memory(i64 %n) + ret i64 %tmp1 +} + +define internal i64 @test_memory(i64 %n) { +entry: + %array = load i64*, i64** @global_array + br label %for.body + +for.body: + %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] + %r = phi i64 [ 0, %entry ], [ %tmp3, %for.body ] + %tmp0 = load void ()*, void ()** @global_function + call void %tmp0() + %tmp1 = getelementptr inbounds i64, i64* %array, i64 %i + %tmp2 = load i64, i64* %tmp1 + %tmp3 = add i64 %tmp2, %r + %i.next = add nuw nsw i64 %i, 1 + %cond = icmp slt i64 %i.next, %n + br i1 %cond, label %for.body, label %for.end + +for.end: + %tmp4 = phi i64 [ %tmp3, %for.body ] + ret i64 %tmp4 +} + +declare void @invariant_1() #0 +declare void @invariant_2() #0 + +attributes #0 = { nounwind readnone } diff --git a/test/Transforms/CalledValuePropagation/simple-select.ll b/test/Transforms/CalledValuePropagation/simple-select.ll new file mode 100644 index 00000000000..3d6c7dad7c8 --- /dev/null +++ b/test/Transforms/CalledValuePropagation/simple-select.ll @@ -0,0 +1,39 @@ +; RUN: opt -called-value-propagation -S < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnueabi" + +@global_function = internal unnamed_addr global void ()* null, align 8 +@global_scalar = internal unnamed_addr global i64 zeroinitializer + +; This test checks that we propagate the functions through a select +; instruction, and attach !callees metadata to the call. Such metadata can +; enable optimizations of this code sequence. +; +; For example, since both of the targeted functions have the "norecurse" +; attribute, the function attributes pass can be made to infer that +; "@test_select" is also norecurse. This would allow the globals optimizer to +; localize "@global_scalar". The function could then be further simplified to +; always return the constant "1", eliminating the load and store instructions. +; +; CHECK: call void %tmp0(), !callees ![[MD:[0-9]+]] +; CHECK: ![[MD]] = !{void ()* @norecurse_1, void ()* @norecurse_2} +; +define i64 @test_select_entry(i1 %flag) { +entry: + %tmp0 = call i64 @test_select(i1 %flag) + ret i64 %tmp0 +} + +define internal i64 @test_select(i1 %flag) { +entry: + %tmp0 = select i1 %flag, void ()* @norecurse_1, void ()* @norecurse_2 + store i64 1, i64* @global_scalar + call void %tmp0() + %tmp1 = load i64, i64* @global_scalar + ret i64 %tmp1 +} + +declare void @norecurse_1() #0 +declare void @norecurse_2() #0 + +attributes #0 = { norecurse } -- 2.40.0