--- /dev/null
+//===---- SLPVectorizer.h ---------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass implements the Bottom Up SLP vectorizer. It detects consecutive
+// stores that can be put together into vector-stores. Next, it attempts to
+// construct vectorizable tree using the use-def chains. If a profitable tree
+// was found, the SLP vectorizer performs vectorization on the tree.
+//
+// The pass is inspired by the work described in the paper:
+// "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_SLPVECTORIZER_H
+#define LLVM_TRANSFORMS_SCALAR_SLPVECTORIZER_H
+
+#include "llvm/ADT/MapVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/DemandedBits.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// A private "module" namespace for types and utilities used by this pass.
+/// These are implementation details and should not be used by clients.
+namespace slpvectorizer {
+class BoUpSLP;
+}
+
+struct SLPVectorizerPass : public PassInfoMixin<SLPVectorizerPass> {
+ typedef SmallVector<StoreInst *, 8> StoreList;
+ typedef MapVector<Value *, StoreList> StoreListMap;
+ typedef SmallVector<WeakVH, 8> WeakVHList;
+ typedef MapVector<Value *, WeakVHList> WeakVHListMap;
+
+ ScalarEvolution *SE = nullptr;
+ TargetTransformInfo *TTI = nullptr;
+ TargetLibraryInfo *TLI = nullptr;
+ AliasAnalysis *AA = nullptr;
+ LoopInfo *LI = nullptr;
+ DominatorTree *DT = nullptr;
+ AssumptionCache *AC = nullptr;
+ DemandedBits *DB = nullptr;
+ const DataLayout *DL = nullptr;
+
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+ // Glue for old PM.
+ bool runImpl(Function &F, ScalarEvolution *SE_, TargetTransformInfo *TTI_,
+ TargetLibraryInfo *TLI_, AliasAnalysis *AA_, LoopInfo *LI_,
+ DominatorTree *DT_, AssumptionCache *AC_, DemandedBits *DB_);
+
+private:
+ /// \brief Collect store and getelementptr instructions and organize them
+ /// according to the underlying object of their pointer operands. We sort the
+ /// instructions by their underlying objects to reduce the cost of
+ /// consecutive access queries.
+ ///
+ /// TODO: We can further reduce this cost if we flush the chain creation
+ /// every time we run into a memory barrier.
+ void collectSeedInstructions(BasicBlock *BB);
+
+ /// \brief Try to vectorize a chain that starts at two arithmetic instrs.
+ bool tryToVectorizePair(Value *A, Value *B, slpvectorizer::BoUpSLP &R);
+
+ /// \brief Try to vectorize a list of operands.
+ /// \@param BuildVector A list of users to ignore for the purpose of
+ /// scheduling and that don't need extracting.
+ /// \returns true if a value was vectorized.
+ bool tryToVectorizeList(ArrayRef<Value *> VL, slpvectorizer::BoUpSLP &R,
+ ArrayRef<Value *> BuildVector = None,
+ bool allowReorder = false);
+
+ /// \brief Try to vectorize a chain that may start at the operands of \V;
+ bool tryToVectorize(BinaryOperator *V, slpvectorizer::BoUpSLP &R);
+
+ /// \brief Vectorize the store instructions collected in Stores.
+ bool vectorizeStoreChains(slpvectorizer::BoUpSLP &R);
+
+ /// \brief Vectorize the index computations of the getelementptr instructions
+ /// collected in GEPs.
+ bool vectorizeGEPIndices(BasicBlock *BB, slpvectorizer::BoUpSLP &R);
+
+ /// \brief Scan the basic block and look for patterns that are likely to start
+ /// a vectorization chain.
+ bool vectorizeChainsInBlock(BasicBlock *BB, slpvectorizer::BoUpSLP &R);
+
+ bool vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold,
+ slpvectorizer::BoUpSLP &R, unsigned VecRegSize);
+
+ bool vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold,
+ slpvectorizer::BoUpSLP &R);
+
+ /// The store instructions in a basic block organized by base pointer.
+ StoreListMap Stores;
+
+ /// The getelementptr instructions in a basic block organized by base pointer.
+ WeakVHListMap GEPs;
+};
+}
+
+#endif // LLVM_TRANSFORMS_SCALAR_SLPVECTORIZER_H
// "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks.
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/MapVector.h"
+#include "llvm/Transforms/Scalar/SLPVectorizer.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
-#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/DataLayout.h"
#include <memory>
using namespace llvm;
+using namespace slpvectorizer;
#define SV_NAME "slp-vectorizer"
#define DEBUG_TYPE "SLP"
"slp-min-reg-size", cl::init(128), cl::Hidden,
cl::desc("Attempt to vectorize for this register size in bits"));
-namespace {
-
// FIXME: Set this via cl::opt to allow overriding.
static const unsigned RecursionMaxDepth = 12;
return true;
}
+namespace llvm {
+namespace slpvectorizer {
/// Bottom Up SLP Vectorizer.
class BoUpSLP {
public:
/// can legally be represented.
MapVector<Value *, uint64_t> MinBWs;
};
+} // end namespace llvm
+} // end namespace slpvectorizer
#ifndef NDEBUG
-raw_ostream &operator<<(raw_ostream &os, const BoUpSLP::ScheduleData &SD) {
+raw_ostream &llvm::slpvectorizer::operator<<(raw_ostream &os,
+ const BoUpSLP::ScheduleData &SD) {
SD.dump(os);
return os;
}
/// The SLPVectorizer Pass.
struct SLPVectorizer : public FunctionPass {
- typedef SmallVector<StoreInst *, 8> StoreList;
- typedef MapVector<Value *, StoreList> StoreListMap;
- typedef SmallVector<WeakVH, 8> WeakVHList;
- typedef MapVector<Value *, WeakVHList> WeakVHListMap;
+ SLPVectorizerPass Impl;
/// Pass identification, replacement for typeid
static char ID;
initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
}
- ScalarEvolution *SE;
- TargetTransformInfo *TTI;
- TargetLibraryInfo *TLI;
- AliasAnalysis *AA;
- LoopInfo *LI;
- DominatorTree *DT;
- AssumptionCache *AC;
- DemandedBits *DB;
- const DataLayout *DL;
bool doInitialization(Module &M) override {
- DL = &M.getDataLayout();
return false;
}
if (skipFunction(F))
return false;
- SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- TLI = TLIP ? &TLIP->getTLI() : nullptr;
- AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- DB = &getAnalysis<DemandedBitsWrapperPass>().getDemandedBits();
-
- Stores.clear();
- GEPs.clear();
- bool Changed = false;
-
- // If the target claims to have no vector registers don't attempt
- // vectorization.
- if (!TTI->getNumberOfRegisters(true))
- return false;
-
- // Don't vectorize when the attribute NoImplicitFloat is used.
- if (F.hasFnAttribute(Attribute::NoImplicitFloat))
- return false;
-
- DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n");
-
- // Use the bottom up slp vectorizer to construct chains that start with
- // store instructions.
- BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC, DB, DL);
-
- // A general note: the vectorizer must use BoUpSLP::eraseInstruction() to
- // delete instructions.
-
- // Scan the blocks in the function in post order.
- for (auto BB : post_order(&F.getEntryBlock())) {
- collectSeedInstructions(BB);
-
- // Vectorize trees that end at stores.
- if (!Stores.empty()) {
- DEBUG(dbgs() << "SLP: Found stores for " << Stores.size()
- << " underlying objects.\n");
- Changed |= vectorizeStoreChains(R);
- }
-
- // Vectorize trees that end at reductions.
- Changed |= vectorizeChainsInBlock(BB, R);
-
- // Vectorize the index computations of getelementptr instructions. This
- // is primarily intended to catch gather-like idioms ending at
- // non-consecutive loads.
- if (!GEPs.empty()) {
- DEBUG(dbgs() << "SLP: Found GEPs for " << GEPs.size()
- << " underlying objects.\n");
- Changed |= vectorizeGEPIndices(BB, R);
- }
- }
-
- if (Changed) {
- R.optimizeGatherSequence();
- DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n");
- DEBUG(verifyFunction(F));
- }
- return Changed;
+ auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
+ auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ auto *DB = &getAnalysis<DemandedBitsWrapperPass>().getDemandedBits();
+
+ return Impl.runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addPreserved<GlobalsAAWrapperPass>();
AU.setPreservesCFG();
}
+};
-private:
- /// \brief Collect store and getelementptr instructions and organize them
- /// according to the underlying object of their pointer operands. We sort the
- /// instructions by their underlying objects to reduce the cost of
- /// consecutive access queries.
- ///
- /// TODO: We can further reduce this cost if we flush the chain creation
- /// every time we run into a memory barrier.
- void collectSeedInstructions(BasicBlock *BB);
+PreservedAnalyses SLPVectorizerPass::run(Function &F, FunctionAnalysisManager &AM) {
+ auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
+ auto *TTI = &AM.getResult<TargetIRAnalysis>(F);
+ auto *TLI = AM.getCachedResult<TargetLibraryAnalysis>(F);
+ auto *AA = &AM.getResult<AAManager>(F);
+ auto *LI = &AM.getResult<LoopAnalysis>(F);
+ auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ auto *AC = &AM.getResult<AssumptionAnalysis>(F);
+ auto *DB = &AM.getResult<DemandedBitsAnalysis>(F);
+
+ bool Changed = runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB);
+ if (!Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<LoopAnalysis>();
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<AAManager>();
+ PA.preserve<GlobalsAA>();
+ return PA;
+}
- /// \brief Try to vectorize a chain that starts at two arithmetic instrs.
- bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R);
+bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
+ TargetTransformInfo *TTI_,
+ TargetLibraryInfo *TLI_, AliasAnalysis *AA_,
+ LoopInfo *LI_, DominatorTree *DT_,
+ AssumptionCache *AC_, DemandedBits *DB_) {
+ SE = SE_;
+ TTI = TTI_;
+ TLI = TLI_;
+ AA = AA_;
+ LI = LI_;
+ DT = DT_;
+ AC = AC_;
+ DB = DB_;
+ DL = &F.getParent()->getDataLayout();
- /// \brief Try to vectorize a list of operands.
- /// \@param BuildVector A list of users to ignore for the purpose of
- /// scheduling and that don't need extracting.
- /// \returns true if a value was vectorized.
- bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
- ArrayRef<Value *> BuildVector = None,
- bool allowReorder = false);
+ Stores.clear();
+ GEPs.clear();
+ bool Changed = false;
- /// \brief Try to vectorize a chain that may start at the operands of \V;
- bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);
+ // If the target claims to have no vector registers don't attempt
+ // vectorization.
+ if (!TTI->getNumberOfRegisters(true))
+ return false;
- /// \brief Vectorize the store instructions collected in Stores.
- bool vectorizeStoreChains(BoUpSLP &R);
+ // Don't vectorize when the attribute NoImplicitFloat is used.
+ if (F.hasFnAttribute(Attribute::NoImplicitFloat))
+ return false;
- /// \brief Vectorize the index computations of the getelementptr instructions
- /// collected in GEPs.
- bool vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R);
+ DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n");
- /// \brief Scan the basic block and look for patterns that are likely to start
- /// a vectorization chain.
- bool vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R);
+ // Use the bottom up slp vectorizer to construct chains that start with
+ // store instructions.
+ BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC, DB, DL);
- bool vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold,
- BoUpSLP &R, unsigned VecRegSize);
+ // A general note: the vectorizer must use BoUpSLP::eraseInstruction() to
+ // delete instructions.
- bool vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold,
- BoUpSLP &R);
+ // Scan the blocks in the function in post order.
+ for (auto BB : post_order(&F.getEntryBlock())) {
+ collectSeedInstructions(BB);
- /// The store instructions in a basic block organized by base pointer.
- StoreListMap Stores;
+ // Vectorize trees that end at stores.
+ if (!Stores.empty()) {
+ DEBUG(dbgs() << "SLP: Found stores for " << Stores.size()
+ << " underlying objects.\n");
+ Changed |= vectorizeStoreChains(R);
+ }
- /// The getelementptr instructions in a basic block organized by base pointer.
- WeakVHListMap GEPs;
-};
+ // Vectorize trees that end at reductions.
+ Changed |= vectorizeChainsInBlock(BB, R);
+
+ // Vectorize the index computations of getelementptr instructions. This
+ // is primarily intended to catch gather-like idioms ending at
+ // non-consecutive loads.
+ if (!GEPs.empty()) {
+ DEBUG(dbgs() << "SLP: Found GEPs for " << GEPs.size()
+ << " underlying objects.\n");
+ Changed |= vectorizeGEPIndices(BB, R);
+ }
+ }
+
+ if (Changed) {
+ R.optimizeGatherSequence();
+ DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n");
+ DEBUG(verifyFunction(F));
+ }
+ return Changed;
+}
/// \brief Check that the Values in the slice in VL array are still existent in
/// the WeakVH array.
return !std::equal(VL.begin(), VL.end(), VH.begin());
}
-bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
- int CostThreshold, BoUpSLP &R,
- unsigned VecRegSize) {
+bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain,
+ int CostThreshold, BoUpSLP &R,
+ unsigned VecRegSize) {
unsigned ChainLen = Chain.size();
DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen
<< "\n");
return Changed;
}
-bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
- int costThreshold, BoUpSLP &R) {
+bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
+ int costThreshold, BoUpSLP &R) {
SetVector<StoreInst *> Heads, Tails;
SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain;
return Changed;
}
-void SLPVectorizer::collectSeedInstructions(BasicBlock *BB) {
+void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) {
// Initialize the collections. We will make a single pass over the block.
Stores.clear();
}
}
-bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
+bool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
if (!A || !B)
return false;
Value *VL[] = { A, B };
return tryToVectorizeList(VL, R, None, true);
}
-bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
- ArrayRef<Value *> BuildVector,
- bool allowReorder) {
+bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
+ ArrayRef<Value *> BuildVector,
+ bool allowReorder) {
if (VL.size() < 2)
return false;
return Changed;
}
-bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
+bool SLPVectorizerPass::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
if (!V)
return false;
return HorRdx.tryToReduce(R, TTI);
}
-bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
+bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
bool Changed = false;
SmallVector<Value *, 4> Incoming;
SmallSet<Value *, 16> VisitedInstrs;
return Changed;
}
-bool SLPVectorizer::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {
+bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {
auto Changed = false;
for (auto &Entry : GEPs) {
return Changed;
}
-bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {
+bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
bool Changed = false;
// Attempt to sort and vectorize each of the store-groups.
for (StoreListMap::iterator it = Stores.begin(), e = Stores.end(); it != e;
return Changed;
}
-} // end anonymous namespace
-
char SLPVectorizer::ID = 0;
static const char lv_name[] = "SLP Vectorizer";
INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)