From 8847c18e07e5805d4bf63647f77f4b5ff66e22b7 Mon Sep 17 00:00:00 2001 From: Sean Silva Date: Wed, 15 Jun 2016 08:43:40 +0000 Subject: [PATCH] [PM] Port SLPVectorizer to the new PM This uses the "runImpl" approach to share code with the old PM. Porting to the new PM meant abandoning the anonymous namespace enclosing most of SLPVectorizer.cpp which is a bit of a bummer (but not a big deal compared to having to pull the pass class into a header which the new PM requires since it calls the constructor directly). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@272766 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../llvm/Transforms/Scalar/SLPVectorizer.h | 113 ++++++++ lib/Passes/PassBuilder.cpp | 1 + lib/Passes/PassRegistry.def | 1 + lib/Transforms/Vectorize/SLPVectorizer.cpp | 251 ++++++++---------- test/Transforms/SLPVectorizer/X86/gep.ll | 1 + 5 files changed, 229 insertions(+), 138 deletions(-) create mode 100644 include/llvm/Transforms/Scalar/SLPVectorizer.h diff --git a/include/llvm/Transforms/Scalar/SLPVectorizer.h b/include/llvm/Transforms/Scalar/SLPVectorizer.h new file mode 100644 index 00000000000..0f15d5663ce --- /dev/null +++ b/include/llvm/Transforms/Scalar/SLPVectorizer.h @@ -0,0 +1,113 @@ +//===---- SLPVectorizer.h ---------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This pass implements the Bottom Up SLP vectorizer. It detects consecutive +// stores that can be put together into vector-stores. Next, it attempts to +// construct vectorizable tree using the use-def chains. If a profitable tree +// was found, the SLP vectorizer performs vectorization on the tree. +// +// The pass is inspired by the work described in the paper: +// "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_SLPVECTORIZER_H +#define LLVM_TRANSFORMS_SCALAR_SLPVECTORIZER_H + +#include "llvm/ADT/MapVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/DemandedBits.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/// A private "module" namespace for types and utilities used by this pass. +/// These are implementation details and should not be used by clients. +namespace slpvectorizer { +class BoUpSLP; +} + +struct SLPVectorizerPass : public PassInfoMixin { + typedef SmallVector StoreList; + typedef MapVector StoreListMap; + typedef SmallVector WeakVHList; + typedef MapVector WeakVHListMap; + + ScalarEvolution *SE = nullptr; + TargetTransformInfo *TTI = nullptr; + TargetLibraryInfo *TLI = nullptr; + AliasAnalysis *AA = nullptr; + LoopInfo *LI = nullptr; + DominatorTree *DT = nullptr; + AssumptionCache *AC = nullptr; + DemandedBits *DB = nullptr; + const DataLayout *DL = nullptr; + +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + // Glue for old PM. + bool runImpl(Function &F, ScalarEvolution *SE_, TargetTransformInfo *TTI_, + TargetLibraryInfo *TLI_, AliasAnalysis *AA_, LoopInfo *LI_, + DominatorTree *DT_, AssumptionCache *AC_, DemandedBits *DB_); + +private: + /// \brief Collect store and getelementptr instructions and organize them + /// according to the underlying object of their pointer operands. We sort the + /// instructions by their underlying objects to reduce the cost of + /// consecutive access queries. + /// + /// TODO: We can further reduce this cost if we flush the chain creation + /// every time we run into a memory barrier. + void collectSeedInstructions(BasicBlock *BB); + + /// \brief Try to vectorize a chain that starts at two arithmetic instrs. + bool tryToVectorizePair(Value *A, Value *B, slpvectorizer::BoUpSLP &R); + + /// \brief Try to vectorize a list of operands. + /// \@param BuildVector A list of users to ignore for the purpose of + /// scheduling and that don't need extracting. + /// \returns true if a value was vectorized. + bool tryToVectorizeList(ArrayRef VL, slpvectorizer::BoUpSLP &R, + ArrayRef BuildVector = None, + bool allowReorder = false); + + /// \brief Try to vectorize a chain that may start at the operands of \V; + bool tryToVectorize(BinaryOperator *V, slpvectorizer::BoUpSLP &R); + + /// \brief Vectorize the store instructions collected in Stores. + bool vectorizeStoreChains(slpvectorizer::BoUpSLP &R); + + /// \brief Vectorize the index computations of the getelementptr instructions + /// collected in GEPs. + bool vectorizeGEPIndices(BasicBlock *BB, slpvectorizer::BoUpSLP &R); + + /// \brief Scan the basic block and look for patterns that are likely to start + /// a vectorization chain. + bool vectorizeChainsInBlock(BasicBlock *BB, slpvectorizer::BoUpSLP &R); + + bool vectorizeStoreChain(ArrayRef Chain, int CostThreshold, + slpvectorizer::BoUpSLP &R, unsigned VecRegSize); + + bool vectorizeStores(ArrayRef Stores, int costThreshold, + slpvectorizer::BoUpSLP &R); + + /// The store instructions in a basic block organized by base pointer. + StoreListMap Stores; + + /// The getelementptr instructions in a basic block organized by base pointer. + WeakVHListMap GEPs; +}; +} + +#endif // LLVM_TRANSFORMS_SCALAR_SLPVECTORIZER_H diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp index cc0ae02eb26..a38b0dd6aa2 100644 --- a/lib/Passes/PassBuilder.cpp +++ b/lib/Passes/PassBuilder.cpp @@ -90,6 +90,7 @@ #include "llvm/Transforms/Scalar/SROA.h" #include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Scalar/Sink.h" +#include "llvm/Transforms/Scalar/SLPVectorizer.h" #include "llvm/Transforms/Utils/LCSSA.h" #include "llvm/Transforms/Utils/Mem2Reg.h" #include "llvm/Transforms/Utils/MemorySSA.h" diff --git a/lib/Passes/PassRegistry.def b/lib/Passes/PassRegistry.def index 844bb843a7a..b36699cdd1e 100644 --- a/lib/Passes/PassRegistry.def +++ b/lib/Passes/PassRegistry.def @@ -153,6 +153,7 @@ FUNCTION_PASS("reassociate", ReassociatePass()) FUNCTION_PASS("sccp", SCCPPass()) FUNCTION_PASS("simplify-cfg", SimplifyCFGPass()) FUNCTION_PASS("sink", SinkingPass()) +FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass()) FUNCTION_PASS("sroa", SROA()) FUNCTION_PASS("verify", VerifierPass()) FUNCTION_PASS("verify", DominatorTreeVerifierPass()) diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 66d7fc8f1f4..d58017474d4 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -15,22 +15,16 @@ // "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks. // //===----------------------------------------------------------------------===// -#include "llvm/ADT/MapVector.h" +#include "llvm/Transforms/Scalar/SLPVectorizer.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" -#include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopAccessAnalysis.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/DataLayout.h" @@ -52,6 +46,7 @@ #include using namespace llvm; +using namespace slpvectorizer; #define SV_NAME "slp-vectorizer" #define DEBUG_TYPE "SLP" @@ -88,8 +83,6 @@ static cl::opt MinVectorRegSizeOption( "slp-min-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits")); -namespace { - // FIXME: Set this via cl::opt to allow overriding. static const unsigned RecursionMaxDepth = 12; @@ -338,6 +331,8 @@ static bool isSimple(Instruction *I) { return true; } +namespace llvm { +namespace slpvectorizer { /// Bottom Up SLP Vectorizer. class BoUpSLP { public: @@ -947,9 +942,12 @@ private: /// can legally be represented. MapVector MinBWs; }; +} // end namespace llvm +} // end namespace slpvectorizer #ifndef NDEBUG -raw_ostream &operator<<(raw_ostream &os, const BoUpSLP::ScheduleData &SD) { +raw_ostream &llvm::slpvectorizer::operator<<(raw_ostream &os, + const BoUpSLP::ScheduleData &SD) { SD.dump(os); return os; } @@ -3479,10 +3477,7 @@ void BoUpSLP::computeMinimumValueSizes() { /// The SLPVectorizer Pass. struct SLPVectorizer : public FunctionPass { - typedef SmallVector StoreList; - typedef MapVector StoreListMap; - typedef SmallVector WeakVHList; - typedef MapVector WeakVHListMap; + SLPVectorizerPass Impl; /// Pass identification, replacement for typeid static char ID; @@ -3491,18 +3486,8 @@ struct SLPVectorizer : public FunctionPass { initializeSLPVectorizerPass(*PassRegistry::getPassRegistry()); } - ScalarEvolution *SE; - TargetTransformInfo *TTI; - TargetLibraryInfo *TLI; - AliasAnalysis *AA; - LoopInfo *LI; - DominatorTree *DT; - AssumptionCache *AC; - DemandedBits *DB; - const DataLayout *DL; bool doInitialization(Module &M) override { - DL = &M.getDataLayout(); return false; } @@ -3510,68 +3495,17 @@ struct SLPVectorizer : public FunctionPass { if (skipFunction(F)) return false; - SE = &getAnalysis().getSE(); - TTI = &getAnalysis().getTTI(F); + auto *SE = &getAnalysis().getSE(); + auto *TTI = &getAnalysis().getTTI(F); auto *TLIP = getAnalysisIfAvailable(); - TLI = TLIP ? &TLIP->getTLI() : nullptr; - AA = &getAnalysis().getAAResults(); - LI = &getAnalysis().getLoopInfo(); - DT = &getAnalysis().getDomTree(); - AC = &getAnalysis().getAssumptionCache(F); - DB = &getAnalysis().getDemandedBits(); - - Stores.clear(); - GEPs.clear(); - bool Changed = false; - - // If the target claims to have no vector registers don't attempt - // vectorization. - if (!TTI->getNumberOfRegisters(true)) - return false; - - // Don't vectorize when the attribute NoImplicitFloat is used. - if (F.hasFnAttribute(Attribute::NoImplicitFloat)) - return false; - - DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n"); - - // Use the bottom up slp vectorizer to construct chains that start with - // store instructions. - BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC, DB, DL); - - // A general note: the vectorizer must use BoUpSLP::eraseInstruction() to - // delete instructions. - - // Scan the blocks in the function in post order. - for (auto BB : post_order(&F.getEntryBlock())) { - collectSeedInstructions(BB); - - // Vectorize trees that end at stores. - if (!Stores.empty()) { - DEBUG(dbgs() << "SLP: Found stores for " << Stores.size() - << " underlying objects.\n"); - Changed |= vectorizeStoreChains(R); - } - - // Vectorize trees that end at reductions. - Changed |= vectorizeChainsInBlock(BB, R); - - // Vectorize the index computations of getelementptr instructions. This - // is primarily intended to catch gather-like idioms ending at - // non-consecutive loads. - if (!GEPs.empty()) { - DEBUG(dbgs() << "SLP: Found GEPs for " << GEPs.size() - << " underlying objects.\n"); - Changed |= vectorizeGEPIndices(BB, R); - } - } - - if (Changed) { - R.optimizeGatherSequence(); - DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n"); - DEBUG(verifyFunction(F)); - } - return Changed; + auto *TLI = TLIP ? &TLIP->getTLI() : nullptr; + auto *AA = &getAnalysis().getAAResults(); + auto *LI = &getAnalysis().getLoopInfo(); + auto *DT = &getAnalysis().getDomTree(); + auto *AC = &getAnalysis().getAssumptionCache(F); + auto *DB = &getAnalysis().getDemandedBits(); + + return Impl.runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -3589,54 +3523,97 @@ struct SLPVectorizer : public FunctionPass { AU.addPreserved(); AU.setPreservesCFG(); } +}; -private: - /// \brief Collect store and getelementptr instructions and organize them - /// according to the underlying object of their pointer operands. We sort the - /// instructions by their underlying objects to reduce the cost of - /// consecutive access queries. - /// - /// TODO: We can further reduce this cost if we flush the chain creation - /// every time we run into a memory barrier. - void collectSeedInstructions(BasicBlock *BB); +PreservedAnalyses SLPVectorizerPass::run(Function &F, FunctionAnalysisManager &AM) { + auto *SE = &AM.getResult(F); + auto *TTI = &AM.getResult(F); + auto *TLI = AM.getCachedResult(F); + auto *AA = &AM.getResult(F); + auto *LI = &AM.getResult(F); + auto *DT = &AM.getResult(F); + auto *AC = &AM.getResult(F); + auto *DB = &AM.getResult(F); + + bool Changed = runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB); + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve(); + PA.preserve(); + PA.preserve(); + PA.preserve(); + return PA; +} - /// \brief Try to vectorize a chain that starts at two arithmetic instrs. - bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R); +bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, + TargetTransformInfo *TTI_, + TargetLibraryInfo *TLI_, AliasAnalysis *AA_, + LoopInfo *LI_, DominatorTree *DT_, + AssumptionCache *AC_, DemandedBits *DB_) { + SE = SE_; + TTI = TTI_; + TLI = TLI_; + AA = AA_; + LI = LI_; + DT = DT_; + AC = AC_; + DB = DB_; + DL = &F.getParent()->getDataLayout(); - /// \brief Try to vectorize a list of operands. - /// \@param BuildVector A list of users to ignore for the purpose of - /// scheduling and that don't need extracting. - /// \returns true if a value was vectorized. - bool tryToVectorizeList(ArrayRef VL, BoUpSLP &R, - ArrayRef BuildVector = None, - bool allowReorder = false); + Stores.clear(); + GEPs.clear(); + bool Changed = false; - /// \brief Try to vectorize a chain that may start at the operands of \V; - bool tryToVectorize(BinaryOperator *V, BoUpSLP &R); + // If the target claims to have no vector registers don't attempt + // vectorization. + if (!TTI->getNumberOfRegisters(true)) + return false; - /// \brief Vectorize the store instructions collected in Stores. - bool vectorizeStoreChains(BoUpSLP &R); + // Don't vectorize when the attribute NoImplicitFloat is used. + if (F.hasFnAttribute(Attribute::NoImplicitFloat)) + return false; - /// \brief Vectorize the index computations of the getelementptr instructions - /// collected in GEPs. - bool vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R); + DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n"); - /// \brief Scan the basic block and look for patterns that are likely to start - /// a vectorization chain. - bool vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R); + // Use the bottom up slp vectorizer to construct chains that start with + // store instructions. + BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC, DB, DL); - bool vectorizeStoreChain(ArrayRef Chain, int CostThreshold, - BoUpSLP &R, unsigned VecRegSize); + // A general note: the vectorizer must use BoUpSLP::eraseInstruction() to + // delete instructions. - bool vectorizeStores(ArrayRef Stores, int costThreshold, - BoUpSLP &R); + // Scan the blocks in the function in post order. + for (auto BB : post_order(&F.getEntryBlock())) { + collectSeedInstructions(BB); - /// The store instructions in a basic block organized by base pointer. - StoreListMap Stores; + // Vectorize trees that end at stores. + if (!Stores.empty()) { + DEBUG(dbgs() << "SLP: Found stores for " << Stores.size() + << " underlying objects.\n"); + Changed |= vectorizeStoreChains(R); + } - /// The getelementptr instructions in a basic block organized by base pointer. - WeakVHListMap GEPs; -}; + // Vectorize trees that end at reductions. + Changed |= vectorizeChainsInBlock(BB, R); + + // Vectorize the index computations of getelementptr instructions. This + // is primarily intended to catch gather-like idioms ending at + // non-consecutive loads. + if (!GEPs.empty()) { + DEBUG(dbgs() << "SLP: Found GEPs for " << GEPs.size() + << " underlying objects.\n"); + Changed |= vectorizeGEPIndices(BB, R); + } + } + + if (Changed) { + R.optimizeGatherSequence(); + DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n"); + DEBUG(verifyFunction(F)); + } + return Changed; +} /// \brief Check that the Values in the slice in VL array are still existent in /// the WeakVH array. @@ -3649,9 +3626,9 @@ static bool hasValueBeenRAUWed(ArrayRef VL, ArrayRef VH, return !std::equal(VL.begin(), VL.end(), VH.begin()); } -bool SLPVectorizer::vectorizeStoreChain(ArrayRef Chain, - int CostThreshold, BoUpSLP &R, - unsigned VecRegSize) { +bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, + int CostThreshold, BoUpSLP &R, + unsigned VecRegSize) { unsigned ChainLen = Chain.size(); DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen << "\n"); @@ -3697,8 +3674,8 @@ bool SLPVectorizer::vectorizeStoreChain(ArrayRef Chain, return Changed; } -bool SLPVectorizer::vectorizeStores(ArrayRef Stores, - int costThreshold, BoUpSLP &R) { +bool SLPVectorizerPass::vectorizeStores(ArrayRef Stores, + int costThreshold, BoUpSLP &R) { SetVector Heads, Tails; SmallDenseMap ConsecutiveChain; @@ -3766,7 +3743,7 @@ bool SLPVectorizer::vectorizeStores(ArrayRef Stores, return Changed; } -void SLPVectorizer::collectSeedInstructions(BasicBlock *BB) { +void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) { // Initialize the collections. We will make a single pass over the block. Stores.clear(); @@ -3801,16 +3778,16 @@ void SLPVectorizer::collectSeedInstructions(BasicBlock *BB) { } } -bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { +bool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { if (!A || !B) return false; Value *VL[] = { A, B }; return tryToVectorizeList(VL, R, None, true); } -bool SLPVectorizer::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, - ArrayRef BuildVector, - bool allowReorder) { +bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, + ArrayRef BuildVector, + bool allowReorder) { if (VL.size() < 2) return false; @@ -3912,7 +3889,7 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, return Changed; } -bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) { +bool SLPVectorizerPass::tryToVectorize(BinaryOperator *V, BoUpSLP &R) { if (!V) return false; @@ -4397,7 +4374,7 @@ static bool canMatchHorizontalReduction(PHINode *P, BinaryOperator *BI, return HorRdx.tryToReduce(R, TTI); } -bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { +bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { bool Changed = false; SmallVector Incoming; SmallSet VisitedInstrs; @@ -4595,7 +4572,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { return Changed; } -bool SLPVectorizer::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { +bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { auto Changed = false; for (auto &Entry : GEPs) { @@ -4678,7 +4655,7 @@ bool SLPVectorizer::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { return Changed; } -bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) { +bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) { bool Changed = false; // Attempt to sort and vectorize each of the store-groups. for (StoreListMap::iterator it = Stores.begin(), e = Stores.end(); it != e; @@ -4702,8 +4679,6 @@ bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) { return Changed; } -} // end anonymous namespace - char SLPVectorizer::ID = 0; static const char lv_name[] = "SLP Vectorizer"; INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false) diff --git a/test/Transforms/SLPVectorizer/X86/gep.ll b/test/Transforms/SLPVectorizer/X86/gep.ll index d10f2b6015d..60b0a114c51 100644 --- a/test/Transforms/SLPVectorizer/X86/gep.ll +++ b/test/Transforms/SLPVectorizer/X86/gep.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -basicaa -slp-vectorizer -S |FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes=slp-vectorizer -S |FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-unknown" -- 2.50.1