From 8b3e014d452e902463a28f7950cc7ff7049f8f70 Mon Sep 17 00:00:00 2001 From: Dehao Chen Date: Thu, 5 May 2016 00:54:54 +0000 Subject: [PATCH] clang-format some files in preparation of coming patch reviews. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268583 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Transforms/Utils/LoopUtils.h | 21 +- lib/Transforms/Scalar/LICM.cpp | 400 ++++---- lib/Transforms/Scalar/LoopUnrollPass.cpp | 61 +- lib/Transforms/Vectorize/LoopVectorize.cpp | 1009 ++++++++++---------- 4 files changed, 724 insertions(+), 767 deletions(-) diff --git a/include/llvm/Transforms/Utils/LoopUtils.h b/include/llvm/Transforms/Utils/LoopUtils.h index ff0d673d16e..b91a313ea02 100644 --- a/include/llvm/Transforms/Utils/LoopUtils.h +++ b/include/llvm/Transforms/Utils/LoopUtils.h @@ -37,13 +37,12 @@ class TargetLibraryInfo; /// \brief Captures loop safety information. /// It keep information for loop & its header may throw exception. struct LICMSafetyInfo { - bool MayThrow; // The current loop contains an instruction which - // may throw. - bool HeaderMayThrow; // Same as previous, but specific to loop header + bool MayThrow; // The current loop contains an instruction which + // may throw. + bool HeaderMayThrow; // Same as previous, but specific to loop header // Used to update funclet bundle operands. DenseMap BlockColors; - LICMSafetyInfo() : MayThrow(false), HeaderMayThrow(false) - {} + LICMSafetyInfo() : MayThrow(false), HeaderMayThrow(false) {} }; /// The RecurrenceDescriptor is used to identify recurrences variables in a @@ -268,7 +267,7 @@ public: public: /// Default constructor - creates an invalid induction. InductionDescriptor() - : StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {} + : StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {} /// Get the consecutive direction. Returns: /// 0 - unknown or non-consecutive. @@ -325,8 +324,7 @@ bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, /// If ScalarEvolution is passed in, it will be preserved. /// /// Returns true if any modifications are made to the loop. -bool formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, - ScalarEvolution *SE); +bool formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE); /// \brief Put a loop nest into LCSSA form. /// @@ -370,8 +368,8 @@ bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, /// insertion point vector, PredIteratorCache, LoopInfo, DominatorTree, Loop, /// AliasSet information for all instructions of the loop and loop safety /// information as arguments. It returns changed status. -bool promoteLoopAccessesToScalars(AliasSet &, SmallVectorImpl &, - SmallVectorImpl &, +bool promoteLoopAccessesToScalars(AliasSet &, SmallVectorImpl &, + SmallVectorImpl &, PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *, Loop *, AliasSetTracker *, LICMSafetyInfo *); @@ -394,7 +392,7 @@ Optional findStringMetadataForLoop(Loop *TheLoop, StringRef Name); /// \brief Set input string into loop metadata by keeping other values intact. -void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, +void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V = 0); /// Helper to consistently add the set of standard passes to a loop pass's \c @@ -403,7 +401,6 @@ void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, /// All loop passes should call this as part of implementing their \c /// getAnalysisUsage. void getLoopAnalysisUsage(AnalysisUsage &AU); - } #endif diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index e6ad42bb4c3..d2969453dff 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -30,7 +30,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" @@ -59,6 +58,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -67,15 +67,15 @@ using namespace llvm; #define DEBUG_TYPE "licm" -STATISTIC(NumSunk , "Number of instructions sunk out of loop"); -STATISTIC(NumHoisted , "Number of instructions hoisted out of loop"); +STATISTIC(NumSunk, "Number of instructions sunk out of loop"); +STATISTIC(NumHoisted, "Number of instructions hoisted out of loop"); STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk"); STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk"); -STATISTIC(NumPromoted , "Number of memory locations promoted to registers"); +STATISTIC(NumPromoted, "Number of memory locations promoted to registers"); static cl::opt -DisablePromotion("disable-licm-promotion", cl::Hidden, - cl::desc("Disable memory promotion in LICM pass")); + DisablePromotion("disable-licm-promotion", cl::Hidden, + cl::desc("Disable memory promotion in LICM pass")); static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop, @@ -86,8 +86,7 @@ static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, const Loop *CurLoop, AliasSetTracker *CurAST, const LICMSafetyInfo *SafetyInfo); static bool isGuaranteedToExecute(const Instruction &Inst, - const DominatorTree *DT, - const Loop *CurLoop, + const DominatorTree *DT, const Loop *CurLoop, const LICMSafetyInfo *SafetyInfo); static bool isSafeToExecuteUnconditionally(const Instruction &Inst, const DominatorTree *DT, @@ -96,7 +95,7 @@ static bool isSafeToExecuteUnconditionally(const Instruction &Inst, const LICMSafetyInfo *SafetyInfo, const Instruction *CtxI = nullptr); static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, - const AAMDNodes &AAInfo, + const AAMDNodes &AAInfo, AliasSetTracker *CurAST); static Instruction * CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, @@ -108,57 +107,57 @@ static bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, LICMSafetyInfo *SafetyInfo); namespace { - struct LICM : public LoopPass { - static char ID; // Pass identification, replacement for typeid - LICM() : LoopPass(ID) { - initializeLICMPass(*PassRegistry::getPassRegistry()); - } +struct LICM : public LoopPass { + static char ID; // Pass identification, replacement for typeid + LICM() : LoopPass(ID) { + initializeLICMPass(*PassRegistry::getPassRegistry()); + } - bool runOnLoop(Loop *L, LPPassManager &LPM) override; + bool runOnLoop(Loop *L, LPPassManager &LPM) override; - /// This transformation requires natural loop information & requires that - /// loop preheaders be inserted into the CFG... - /// - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired(); - getLoopAnalysisUsage(AU); - } + /// This transformation requires natural loop information & requires that + /// loop preheaders be inserted into the CFG... + /// + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + getLoopAnalysisUsage(AU); + } - using llvm::Pass::doFinalization; + using llvm::Pass::doFinalization; - bool doFinalization() override { - assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets"); - return false; - } + bool doFinalization() override { + assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets"); + return false; + } - private: - AliasAnalysis *AA; // Current AliasAnalysis information - LoopInfo *LI; // Current LoopInfo - DominatorTree *DT; // Dominator Tree for the current Loop. +private: + AliasAnalysis *AA; // Current AliasAnalysis information + LoopInfo *LI; // Current LoopInfo + DominatorTree *DT; // Dominator Tree for the current Loop. - TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding. + TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding. - // State that is updated as we process loops. - bool Changed; // Set to true when we change anything. - BasicBlock *Preheader; // The preheader block of the current loop... - Loop *CurLoop; // The current loop we are working on... - AliasSetTracker *CurAST; // AliasSet information for the current loop... - DenseMap LoopToAliasSetMap; + // State that is updated as we process loops. + bool Changed; // Set to true when we change anything. + BasicBlock *Preheader; // The preheader block of the current loop... + Loop *CurLoop; // The current loop we are working on... + AliasSetTracker *CurAST; // AliasSet information for the current loop... + DenseMap LoopToAliasSetMap; - /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. - void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, - Loop *L) override; + /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. + void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, + Loop *L) override; - /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias - /// set. - void deleteAnalysisValue(Value *V, Loop *L) override; + /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias + /// set. + void deleteAnalysisValue(Value *V, Loop *L) override; - /// Simple Analysis hook. Delete loop L from alias set map. - void deleteAnalysisLoop(Loop *L) override; + /// Simple Analysis hook. Delete loop L from alias set map. + void deleteAnalysisLoop(Loop *L) override; - AliasSetTracker *collectAliasInfoForLoop(Loop *L); - }; + AliasSetTracker *collectAliasInfoForLoop(Loop *L); +}; } char LICM::ID = 0; @@ -225,9 +224,9 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { // Loop over all of the alias sets in the tracker object. for (AliasSet &AS : *CurAST) - Changed |= promoteLoopAccessesToScalars(AS, ExitBlocks, InsertPts, - PIC, LI, DT, TLI, CurLoop, - CurAST, &SafetyInfo); + Changed |= + promoteLoopAccessesToScalars(AS, ExitBlocks, InsertPts, PIC, LI, DT, + TLI, CurLoop, CurAST, &SafetyInfo); // Once we have promoted values across the loop body we have to recursively // reform LCSSA as any nested loop may now have values defined within the @@ -266,7 +265,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { } /// Walk the specified region of the CFG (defined by all blocks dominated by -/// the specified block, and that are in the current loop) in reverse depth +/// the specified block, and that are in the current loop) in reverse depth /// first order w.r.t the DominatorTree. This allows us to visit uses before /// definitions, allowing us to sink a loop body in one pass without iteration. /// @@ -275,25 +274,27 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) { // Verify inputs. - assert(N != nullptr && AA != nullptr && LI != nullptr && - DT != nullptr && CurLoop != nullptr && CurAST != nullptr && - SafetyInfo != nullptr && "Unexpected input to sinkRegion"); + assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && + CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && + "Unexpected input to sinkRegion"); BasicBlock *BB = N->getBlock(); // If this subregion is not in the top level loop at all, exit. - if (!CurLoop->contains(BB)) return false; + if (!CurLoop->contains(BB)) + return false; // We are processing blocks in reverse dfo, so process children first. bool Changed = false; - const std::vector &Children = N->getChildren(); + const std::vector &Children = N->getChildren(); for (DomTreeNode *Child : Children) Changed |= sinkRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo); // Only need to process the contents of this block if it is not part of a // subloop (which would already have been processed). - if (inSubLoop(BB,CurLoop,LI)) return Changed; + if (inSubLoop(BB, CurLoop, LI)) + return Changed; - for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) { + for (BasicBlock::iterator II = BB->end(); II != BB->begin();) { Instruction &I = *--II; // If the instruction is dead, we would try to sink it because it isn't used @@ -330,20 +331,21 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) { // Verify inputs. - assert(N != nullptr && AA != nullptr && LI != nullptr && - DT != nullptr && CurLoop != nullptr && CurAST != nullptr && - SafetyInfo != nullptr && "Unexpected input to hoistRegion"); + assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && + CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && + "Unexpected input to hoistRegion"); BasicBlock *BB = N->getBlock(); // If this subregion is not in the top level loop at all, exit. - if (!CurLoop->contains(BB)) return false; + if (!CurLoop->contains(BB)) + return false; // Only need to process the contents of this block if it is not part of a // subloop (which would already have been processed). bool Changed = false; if (!inSubLoop(BB, CurLoop, LI)) - for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) { + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { Instruction &I = *II++; // Try constant folding this instruction. If all the operands are // constants, it is technically hoistable, but it would be better to just @@ -364,12 +366,13 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, // if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I, AA, DT, TLI, CurLoop, CurAST, SafetyInfo) && - isSafeToExecuteUnconditionally(I, DT, TLI, CurLoop, SafetyInfo, - CurLoop->getLoopPreheader()->getTerminator())) + isSafeToExecuteUnconditionally( + I, DT, TLI, CurLoop, SafetyInfo, + CurLoop->getLoopPreheader()->getTerminator())) Changed |= hoist(I, DT, CurLoop, SafetyInfo); } - const std::vector &Children = N->getChildren(); + const std::vector &Children = N->getChildren(); for (DomTreeNode *Child : Children) Changed |= hoistRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo); return Changed; @@ -378,7 +381,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, /// Computes loop safety information, checks loop body & header /// for the possibility of may throw exception. /// -void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) { +void llvm::computeLICMSafetyInfo(LICMSafetyInfo *SafetyInfo, Loop *CurLoop) { assert(CurLoop != nullptr && "CurLoop cant be null"); BasicBlock *Header = CurLoop->getHeader(); // Setting default safety values. @@ -388,11 +391,12 @@ void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) { for (BasicBlock::iterator I = Header->begin(), E = Header->end(); (I != E) && !SafetyInfo->HeaderMayThrow; ++I) SafetyInfo->HeaderMayThrow |= I->mayThrow(); - + SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow; - // Iterate over loop instructions and compute safety info. - for (Loop::block_iterator BB = CurLoop->block_begin(), - BBE = CurLoop->block_end(); (BB != BBE) && !SafetyInfo->MayThrow ; ++BB) + // Iterate over loop instructions and compute safety info. + for (Loop::block_iterator BB = CurLoop->block_begin(), + BBE = CurLoop->block_end(); + (BB != BBE) && !SafetyInfo->MayThrow; ++BB) for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); (I != E) && !SafetyInfo->MayThrow; ++I) SafetyInfo->MayThrow |= I->mayThrow(); @@ -415,7 +419,7 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT, // Loads have extra constraints we have to verify before we can hoist them. if (LoadInst *LI = dyn_cast(&I)) { if (!LI->isUnordered()) - return false; // Don't hoist volatile/atomic loads! + return false; // Don't hoist volatile/atomic loads! // Loads from constant memory are always safe to move, even if they end up // in the same alias set as something that ends up being modified. @@ -467,7 +471,8 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT, break; } } - if (!FoundMod) return true; + if (!FoundMod) + return true; } // FIXME: This should use mod/ref information to see if we can hoist or @@ -486,7 +491,7 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT, // TODO: Plumb the context instruction through to make hoisting and sinking // more powerful. Hoisting of loads already works due to the special casing - // above. + // above. return isSafeToExecuteUnconditionally(I, DT, TLI, CurLoop, SafetyInfo, nullptr); } @@ -589,7 +594,8 @@ CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, } ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New); - if (!I.getName().empty()) New->setName(I.getName() + ".le"); + if (!I.getName().empty()) + New->setName(I.getName() + ".le"); // Build LCSSA PHI nodes for any in-loop operands. Note that this is // particularly cheap because we can rip off the PHI node that we're @@ -623,15 +629,17 @@ static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, const LICMSafetyInfo *SafetyInfo) { DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); bool Changed = false; - if (isa(I)) ++NumMovedLoads; - else if (isa(I)) ++NumMovedCalls; + if (isa(I)) + ++NumMovedLoads; + else if (isa(I)) + ++NumMovedCalls; ++NumSunk; Changed = true; #ifndef NDEBUG SmallVector ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); - SmallPtrSet ExitBlockSet(ExitBlocks.begin(), + SmallPtrSet ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end()); #endif @@ -688,8 +696,8 @@ static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, const LICMSafetyInfo *SafetyInfo) { auto *Preheader = CurLoop->getLoopPreheader(); - DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " - << I << "\n"); + DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I + << "\n"); // Metadata can be dependent on conditions we are hoisting above. // Conservatively strip all metadata on the instruction unless we were @@ -705,8 +713,10 @@ static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, // Move the new node to the Preheader, before its terminator. I.moveBefore(Preheader->getTerminator()); - if (isa(I)) ++NumMovedLoads; - else if (isa(I)) ++NumMovedCalls; + if (isa(I)) + ++NumMovedLoads; + else if (isa(I)) + ++NumMovedCalls; ++NumHoisted; return true; } @@ -714,7 +724,7 @@ static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, /// Only sink or hoist an instruction if it is not a trapping instruction, /// or if the instruction is known not to trap when moved to the preheader. /// or if it is a trapping instruction and is guaranteed to execute. -static bool isSafeToExecuteUnconditionally(const Instruction &Inst, +static bool isSafeToExecuteUnconditionally(const Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI, const Loop *CurLoop, @@ -727,9 +737,8 @@ static bool isSafeToExecuteUnconditionally(const Instruction &Inst, } static bool isGuaranteedToExecute(const Instruction &Inst, - const DominatorTree *DT, - const Loop *CurLoop, - const LICMSafetyInfo * SafetyInfo) { + const DominatorTree *DT, const Loop *CurLoop, + const LICMSafetyInfo *SafetyInfo) { // We have to check to make sure that the instruction dominates all // of the exit blocks. If it doesn't, then there is a path out of the loop @@ -749,7 +758,7 @@ static bool isGuaranteedToExecute(const Instruction &Inst, return false; // Get the exit blocks for the current loop. - SmallVector ExitBlocks; + SmallVector ExitBlocks; CurLoop->getExitBlocks(ExitBlocks); // Verify that the block dominates each of the exit blocks of the loop. @@ -766,82 +775,79 @@ static bool isGuaranteedToExecute(const Instruction &Inst, } namespace { - class LoopPromoter : public LoadAndStorePromoter { - Value *SomePtr; // Designated pointer to store to. - SmallPtrSetImpl &PointerMustAliases; - SmallVectorImpl &LoopExitBlocks; - SmallVectorImpl &LoopInsertPts; - PredIteratorCache &PredCache; - AliasSetTracker &AST; - LoopInfo &LI; - DebugLoc DL; - int Alignment; - AAMDNodes AATags; - - Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const { - if (Instruction *I = dyn_cast(V)) - if (Loop *L = LI.getLoopFor(I->getParent())) - if (!L->contains(BB)) { - // We need to create an LCSSA PHI node for the incoming value and - // store that. - PHINode *PN = - PHINode::Create(I->getType(), PredCache.size(BB), - I->getName() + ".lcssa", &BB->front()); - for (BasicBlock *Pred : PredCache.get(BB)) - PN->addIncoming(I, Pred); - return PN; - } - return V; - } +class LoopPromoter : public LoadAndStorePromoter { + Value *SomePtr; // Designated pointer to store to. + SmallPtrSetImpl &PointerMustAliases; + SmallVectorImpl &LoopExitBlocks; + SmallVectorImpl &LoopInsertPts; + PredIteratorCache &PredCache; + AliasSetTracker &AST; + LoopInfo &LI; + DebugLoc DL; + int Alignment; + AAMDNodes AATags; - public: - LoopPromoter(Value *SP, - ArrayRef Insts, - SSAUpdater &S, SmallPtrSetImpl &PMA, - SmallVectorImpl &LEB, - SmallVectorImpl &LIP, PredIteratorCache &PIC, - AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment, - const AAMDNodes &AATags) - : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), - LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast), - LI(li), DL(dl), Alignment(alignment), AATags(AATags) {} - - bool isInstInList(Instruction *I, - const SmallVectorImpl &) const override { - Value *Ptr; - if (LoadInst *LI = dyn_cast(I)) - Ptr = LI->getOperand(0); - else - Ptr = cast(I)->getPointerOperand(); - return PointerMustAliases.count(Ptr); - } + Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const { + if (Instruction *I = dyn_cast(V)) + if (Loop *L = LI.getLoopFor(I->getParent())) + if (!L->contains(BB)) { + // We need to create an LCSSA PHI node for the incoming value and + // store that. + PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB), + I->getName() + ".lcssa", &BB->front()); + for (BasicBlock *Pred : PredCache.get(BB)) + PN->addIncoming(I, Pred); + return PN; + } + return V; + } - void doExtraRewritesBeforeFinalDeletion() const override { - // Insert stores after in the loop exit blocks. Each exit block gets a - // store of the live-out values that feed them. Since we've already told - // the SSA updater about the defs in the loop and the preheader - // definition, it is all set and we can start using it. - for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) { - BasicBlock *ExitBlock = LoopExitBlocks[i]; - Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); - LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock); - Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock); - Instruction *InsertPos = LoopInsertPts[i]; - StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos); - NewSI->setAlignment(Alignment); - NewSI->setDebugLoc(DL); - if (AATags) NewSI->setAAMetadata(AATags); - } - } +public: + LoopPromoter(Value *SP, ArrayRef Insts, SSAUpdater &S, + SmallPtrSetImpl &PMA, + SmallVectorImpl &LEB, + SmallVectorImpl &LIP, PredIteratorCache &PIC, + AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment, + const AAMDNodes &AATags) + : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), + LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast), + LI(li), DL(dl), Alignment(alignment), AATags(AATags) {} + + bool isInstInList(Instruction *I, + const SmallVectorImpl &) const override { + Value *Ptr; + if (LoadInst *LI = dyn_cast(I)) + Ptr = LI->getOperand(0); + else + Ptr = cast(I)->getPointerOperand(); + return PointerMustAliases.count(Ptr); + } - void replaceLoadWithValue(LoadInst *LI, Value *V) const override { - // Update alias analysis. - AST.copyValue(LI, V); + void doExtraRewritesBeforeFinalDeletion() const override { + // Insert stores after in the loop exit blocks. Each exit block gets a + // store of the live-out values that feed them. Since we've already told + // the SSA updater about the defs in the loop and the preheader + // definition, it is all set and we can start using it. + for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) { + BasicBlock *ExitBlock = LoopExitBlocks[i]; + Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); + LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock); + Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock); + Instruction *InsertPos = LoopInsertPts[i]; + StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos); + NewSI->setAlignment(Alignment); + NewSI->setDebugLoc(DL); + if (AATags) + NewSI->setAAMetadata(AATags); } - void instructionDeleted(Instruction *I) const override { - AST.deleteValue(I); - } - }; + } + + void replaceLoadWithValue(LoadInst *LI, Value *V) const override { + // Update alias analysis. + AST.copyValue(LI, V); + } + void instructionDeleted(Instruction *I) const override { AST.deleteValue(I); } +}; } // end anon namespace /// Try to promote memory values to scalars by sinking stores out of the @@ -849,19 +855,14 @@ namespace { /// the stores in the loop, looking for stores to Must pointers which are /// loop invariant. /// -bool llvm::promoteLoopAccessesToScalars(AliasSet &AS, - SmallVectorImpl&ExitBlocks, - SmallVectorImpl&InsertPts, - PredIteratorCache &PIC, LoopInfo *LI, - DominatorTree *DT, - const TargetLibraryInfo *TLI, - Loop *CurLoop, - AliasSetTracker *CurAST, - LICMSafetyInfo * SafetyInfo) { +bool llvm::promoteLoopAccessesToScalars( + AliasSet &AS, SmallVectorImpl &ExitBlocks, + SmallVectorImpl &InsertPts, PredIteratorCache &PIC, + LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, + Loop *CurLoop, AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) { // Verify inputs. - assert(LI != nullptr && DT != nullptr && - CurLoop != nullptr && CurAST != nullptr && - SafetyInfo != nullptr && + assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && + CurAST != nullptr && SafetyInfo != nullptr && "Unexpected Input to promoteLoopAccessesToScalars"); // We can promote this alias set if it has a store, if it is a "Must" alias @@ -875,7 +876,7 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS, "Must alias set should have at least one pointer element in it!"); Value *SomePtr = AS.begin()->getValue(); - BasicBlock * Preheader = CurLoop->getLoopPreheader(); + BasicBlock *Preheader = CurLoop->getLoopPreheader(); // It isn't safe to promote a load/store from the loop if the load/store is // conditional. For example, turning: @@ -907,8 +908,8 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS, // since they're all must alias. bool CanSpeculateLoad = false; - SmallVector LoopUses; - SmallPtrSet PointerMustAliases; + SmallVector LoopUses; + SmallPtrSet PointerMustAliases; // We start with an alignment of one and try to find instructions that allow // us to prove better alignment. @@ -923,7 +924,7 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS, // is available. if (!HasDedicatedExits || !Preheader) return false; - + const DataLayout &MDL = Preheader->getModule()->getDataLayout(); // Check that all of the pointers in the alias set have the same type. We @@ -954,10 +955,8 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS, return Changed; if (!GuaranteedToExecute && !CanSpeculateLoad) - CanSpeculateLoad = - isSafeToExecuteUnconditionally(*Load, DT, TLI, CurLoop, - SafetyInfo, - Preheader->getTerminator()); + CanSpeculateLoad = isSafeToExecuteUnconditionally( + *Load, DT, TLI, CurLoop, SafetyInfo, Preheader->getTerminator()); } else if (const StoreInst *Store = dyn_cast(UI)) { // Stores *of* the pointer are not interesting, only stores *to* the // pointer. @@ -983,16 +982,13 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS, } if (!GuaranteedToExecute) - GuaranteedToExecute = isGuaranteedToExecute(*UI, DT, - CurLoop, SafetyInfo); - + GuaranteedToExecute = + isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo); if (!GuaranteedToExecute && !CanSpeculateLoad) { - CanSpeculateLoad = - isDereferenceableAndAlignedPointer(Store->getPointerOperand(), - Store->getAlignment(), MDL, - Preheader->getTerminator(), - DT, TLI); + CanSpeculateLoad = isDereferenceableAndAlignedPointer( + Store->getPointerOperand(), Store->getAlignment(), MDL, + Preheader->getTerminator(), DT, TLI); } } else return Changed; // Not a load or store. @@ -1014,10 +1010,10 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS, if (!PromotionIsLegal && CanSpeculateLoad) { // If this is a thread local location, then we can insert stores along // paths which originally didn't have them without violating the memory - // model. + // model. Value *Object = GetUnderlyingObject(SomePtr, MDL); - PromotionIsLegal = isAllocLikeFn(Object, TLI) && - !PointerMayBeCaptured(Object, true, true); + PromotionIsLegal = + isAllocLikeFn(Object, TLI) && !PointerMayBeCaptured(Object, true, true); } if (!PromotionIsLegal) return Changed; @@ -1038,7 +1034,8 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS, return Changed; // Otherwise, this is safe to promote, lets do it! - DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n'); + DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr + << '\n'); Changed = true; ++NumPromoted; @@ -1049,20 +1046,19 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS, DebugLoc DL = LoopUses[0]->getDebugLoc(); // We use the SSAUpdater interface to insert phi nodes as required. - SmallVector NewPHIs; + SmallVector NewPHIs; SSAUpdater SSA(&NewPHIs); - LoopPromoter Promoter(SomePtr, LoopUses, SSA, - PointerMustAliases, ExitBlocks, + LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, InsertPts, PIC, *CurAST, *LI, DL, Alignment, AATags); // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. - LoadInst *PreheaderLoad = - new LoadInst(SomePtr, SomePtr->getName()+".promoted", - Preheader->getTerminator()); + LoadInst *PreheaderLoad = new LoadInst( + SomePtr, SomePtr->getName() + ".promoted", Preheader->getTerminator()); PreheaderLoad->setAlignment(Alignment); PreheaderLoad->setDebugLoc(DL); - if (AATags) PreheaderLoad->setAAMetadata(AATags); + if (AATags) + PreheaderLoad->setAAMetadata(AATags); SSA.AddAvailableValue(Preheader, PreheaderLoad); // Rewrite all the loads in the loop and remember all the definitions from @@ -1157,12 +1153,11 @@ void LICM::deleteAnalysisLoop(Loop *L) { LoopToAliasSetMap.erase(L); } - /// Return true if the body of this loop may store into the memory /// location pointed to by V. /// static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, - const AAMDNodes &AAInfo, + const AAMDNodes &AAInfo, AliasSetTracker *CurAST) { // Check to see if any of the basic blocks in CurLoop invalidate *V. return CurAST->getAliasSetForPointer(V, Size, AAInfo).isMod(); @@ -1175,4 +1170,3 @@ static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) { assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop"); return LI->getLoopFor(BB) != CurLoop; } - diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 685ce9e5e8e..2e5c55e73a4 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -12,11 +12,10 @@ // counts of loops easily. //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SetVector.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/LoopUnrollAnalyzer.h" @@ -32,6 +31,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/UnrollLoop.h" #include @@ -60,34 +60,34 @@ static cl::opt UnrollMaxIterationsCountToAnalyze( cl::desc("Don't allow loop unrolling to simulate more than this number of" "iterations when checking full unroll profitability")); -static cl::opt -UnrollCount("unroll-count", cl::Hidden, - cl::desc("Use this unroll count for all loops including those with " - "unroll_count pragma values, for testing purposes")); +static cl::opt UnrollCount( + "unroll-count", cl::Hidden, + cl::desc("Use this unroll count for all loops including those with " + "unroll_count pragma values, for testing purposes")); -static cl::opt -UnrollMaxCount("unroll-max-count", cl::Hidden, - cl::desc("Set the max unroll count for partial and runtime unrolling, for" - "testing purposes")); +static cl::opt UnrollMaxCount( + "unroll-max-count", cl::Hidden, + cl::desc("Set the max unroll count for partial and runtime unrolling, for" + "testing purposes")); -static cl::opt -UnrollFullMaxCount("unroll-full-max-count", cl::Hidden, - cl::desc("Set the max unroll count for full unrolling, for testing purposes")); +static cl::opt UnrollFullMaxCount( + "unroll-full-max-count", cl::Hidden, + cl::desc( + "Set the max unroll count for full unrolling, for testing purposes")); static cl::opt -UnrollAllowPartial("unroll-allow-partial", cl::Hidden, - cl::desc("Allows loops to be partially unrolled until " - "-unroll-threshold loop size is reached.")); + UnrollAllowPartial("unroll-allow-partial", cl::Hidden, + cl::desc("Allows loops to be partially unrolled until " + "-unroll-threshold loop size is reached.")); static cl::opt -UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden, - cl::desc("Unroll loops with run-time trip counts")); - -static cl::opt -PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden, - cl::desc("Unrolled size limit for loops with an unroll(full) or " - "unroll_count pragma.")); + UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden, + cl::desc("Unroll loops with run-time trip counts")); +static cl::opt PragmaUnrollThreshold( + "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden, + cl::desc("Unrolled size limit for loops with an unroll(full) or " + "unroll_count pragma.")); /// A magic value for use with the Threshold parameter to indicate /// that the loop unroll should be performed regardless of how much @@ -453,7 +453,8 @@ static unsigned UnrollCountPragmaValue(const Loop *L) { // unrolling pass is run more than once (which it generally is). static void SetLoopAlreadyUnrolled(Loop *L) { MDNode *LoopID = L->getLoopID(); - if (!LoopID) return; + if (!LoopID) + return; // First remove any existing loop unrolling metadata. SmallVector MDs; @@ -514,9 +515,9 @@ static bool canUnrollCompletely(Loop *L, unsigned Threshold, (int64_t)UnrolledCost - (int64_t)DynamicCostSavingsDiscount <= (int64_t)Threshold) { DEBUG(dbgs() << " Can fully unroll, because unrolling will reduce the " - "expected dynamic cost by " << PercentDynamicCostSaved - << "% (threshold: " << PercentDynamicCostSavedThreshold - << "%)\n" + "expected dynamic cost by " + << PercentDynamicCostSaved << "% (threshold: " + << PercentDynamicCostSavedThreshold << "%)\n" << " and the unrolled cost (" << UnrolledCost << ") is less than the max threshold (" << DynamicCostSavingsDiscount << ").\n"); @@ -544,7 +545,7 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, Optional ProvidedRuntime) { BasicBlock *Header = L->getHeader(); DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName() - << "] Loop %" << Header->getName() << "\n"); + << "] Loop %" << Header->getName() << "\n"); if (HasUnrollDisablePragma(L)) { return false; @@ -592,7 +593,7 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, // When computing the unrolled size, note that the conditional branch on the // backedge and the comparison feeding it are not replicated like the rest of // the loop body (which is why 2 is subtracted). - uint64_t UnrolledSize = (uint64_t)(LoopSize-2) * Count + 2; + uint64_t UnrolledSize = (uint64_t)(LoopSize - 2) * Count + 2; if (NotDuplicatable) { DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable" << " instructions.\n"); @@ -680,7 +681,7 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, // the original count which satisfies the threshold limit. while (Count != 0 && UnrolledSize > UP.PartialThreshold) { Count >>= 1; - UnrolledSize = (LoopSize-2) * Count + 2; + UnrolledSize = (LoopSize - 2) * Count + 2; } if (Count > UP.MaxCount) diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index d1afed0fb58..981324a01e5 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -46,7 +46,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Vectorize.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/MapVector.h" @@ -57,9 +56,9 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/DemandedBits.h" @@ -73,6 +72,7 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" @@ -98,9 +98,9 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/LoopVersioning.h" -#include "llvm/Analysis/VectorUtils.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/LoopVersioning.h" +#include "llvm/Transforms/Vectorize.h" #include #include #include @@ -116,16 +116,15 @@ STATISTIC(LoopsVectorized, "Number of loops vectorized"); STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization"); static cl::opt -EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, - cl::desc("Enable if-conversion during vectorization.")); + EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, + cl::desc("Enable if-conversion during vectorization.")); /// We don't vectorize loops with a known constant trip count below this number. -static cl::opt -TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16), - cl::Hidden, - cl::desc("Don't vectorize loops with a constant " - "trip count that is smaller than this " - "value.")); +static cl::opt TinyTripCountVectorThreshold( + "vectorizer-min-trip-count", cl::init(16), cl::Hidden, + cl::desc("Don't vectorize loops with a constant " + "trip count that is smaller than this " + "value.")); static cl::opt MaximizeBandwidth( "vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, @@ -263,7 +262,7 @@ public: /// A helper function for converting Scalar types to vector types. /// If the incoming type is void, we return void. If the VF is 1, we return /// the scalar type. -static Type* ToVectorTy(Type *Scalar, unsigned VF) { +static Type *ToVectorTy(Type *Scalar, unsigned VF) { if (Scalar->isVoidTy() || VF == 1) return Scalar; return VectorType::get(Scalar, VF); @@ -327,7 +326,7 @@ public: // can be validly truncated to. The cost model has assumed this truncation // will happen when vectorizing. void vectorize(LoopVectorizationLegality *L, - MapVector MinimumBitWidths) { + MapVector MinimumBitWidths) { MinBWs = MinimumBitWidths; Legal = L; // Create a new empty loop. Unlink the old loop and connect the new one. @@ -338,24 +337,22 @@ public: } // Return true if any runtime check is added. - bool IsSafetyChecksAdded() { - return AddedSafetyChecks; - } + bool IsSafetyChecksAdded() { return AddedSafetyChecks; } virtual ~InnerLoopVectorizer() {} protected: /// A small list of PHINodes. - typedef SmallVector PhiVector; + typedef SmallVector PhiVector; /// When we unroll loops we have multiple vector values for each scalar. /// This data structure holds the unrolled and vectorized values that /// originated from one scalar instruction. - typedef SmallVector VectorParts; + typedef SmallVector VectorParts; // When we if-convert we need to create edge masks. We have to cache values // so that we don't end up with exponential recursion/IR. - typedef DenseMap, - VectorParts> EdgeMaskCache; + typedef DenseMap, VectorParts> + EdgeMaskCache; /// Create an empty loop, based on the loop ranges of the old loop. void createEmptyLoop(); @@ -392,8 +389,8 @@ protected: /// Vectorize a single PHINode in a block. This method handles the induction /// variable canonicalization. It supports both VF = 1 for unrolled loops and /// arbitrary length vectors. - void widenPHIInstruction(Instruction *PN, VectorParts &Entry, - unsigned UF, unsigned VF, PhiVector *PV); + void widenPHIInstruction(Instruction *PN, VectorParts &Entry, unsigned UF, + unsigned VF, PhiVector *PV); /// Insert the new loop to the loop hierarchy and pass manager /// and update the analysis passes. @@ -404,7 +401,7 @@ protected: /// scalarized instruction behind an if block predicated on the control /// dependence of the instruction. virtual void scalarizeInstruction(Instruction *Instr, - bool IfPredicateStore=false); + bool IfPredicateStore = false); /// Vectorize Load and Store instructions, virtual void vectorizeMemoryInstruction(Instruction *Instr); @@ -554,11 +551,11 @@ protected: BasicBlock *LoopScalarPreHeader; /// Middle Block between the vector and the scalar. BasicBlock *LoopMiddleBlock; - ///The ExitBlock of the scalar loop. + /// The ExitBlock of the scalar loop. BasicBlock *LoopExitBlock; - ///The vector loop body. + /// The vector loop body. SmallVector LoopVectorBody; - ///The scalar loop body. + /// The scalar loop body. BasicBlock *LoopScalarBody; /// A list of all bypass blocks. The first block is the entry of the loop. SmallVector LoopBypassBlocks; @@ -571,7 +568,7 @@ protected: ValueMap WidenMap; /// Store instructions that should be predicated, as a pair /// - SmallVector, 4> PredicatedStores; + SmallVector, 4> PredicatedStores; EdgeMaskCache MaskCache; /// Trip count of the original loop. Value *TripCount; @@ -581,7 +578,7 @@ protected: /// Map of scalar integer values to the smallest bitwidth they can be legally /// represented as. The vector equivalents of these values should be truncated /// to this type. - MapVector MinBWs; + MapVector MinBWs; LoopVectorizationLegality *Legal; // Record whether runtime check is added. @@ -665,10 +662,8 @@ static void propagateMetadata(Instruction *To, const Instruction *From) { // on the condition, and thus actually aliased with some other // non-speculated memory access when the condition was false, this would be // caught by the runtime overlap checks). - if (Kind != LLVMContext::MD_tbaa && - Kind != LLVMContext::MD_alias_scope && - Kind != LLVMContext::MD_noalias && - Kind != LLVMContext::MD_fpmath && + if (Kind != LLVMContext::MD_tbaa && Kind != LLVMContext::MD_alias_scope && + Kind != LLVMContext::MD_noalias && Kind != LLVMContext::MD_fpmath && Kind != LLVMContext::MD_nontemporal) continue; @@ -934,20 +929,16 @@ private: /// for example 'force', means a decision has been made. So, we need to be /// careful NOT to add them if the user hasn't specifically asked so. class LoopVectorizeHints { - enum HintKind { - HK_WIDTH, - HK_UNROLL, - HK_FORCE - }; + enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE }; /// Hint - associates name and validation with the hint value. struct Hint { - const char * Name; + const char *Name; unsigned Value; // This may have to change for non-numeric values. HintKind Kind; - Hint(const char * Name, unsigned Value, HintKind Kind) - : Name(Name), Value(Value), Kind(Kind) { } + Hint(const char *Name, unsigned Value, HintKind Kind) + : Name(Name), Value(Value), Kind(Kind) {} bool validate(unsigned Val) { switch (Kind) { @@ -1094,9 +1085,7 @@ public: return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe; } - void setPotentiallyUnsafe() { - PotentiallyUnsafe = true; - } + void setPotentiallyUnsafe() { PotentiallyUnsafe = true; } private: /// Find hints specified in the loop metadata and update local values. @@ -1143,7 +1132,8 @@ private: Name = Name.substr(Prefix().size(), StringRef::npos); const ConstantInt *C = mdconst::dyn_extract(Arg); - if (!C) return; + if (!C) + return; unsigned Val = C->getZExtValue(); Hint *Hints[] = {&Width, &Interleave, &Force}; @@ -1169,7 +1159,7 @@ private: /// Matches metadata with hint name. bool matchesHintMetadataName(MDNode *Node, ArrayRef HintTypes) { - MDString* Name = dyn_cast(Node->getOperand(0)); + MDString *Name = dyn_cast(Node->getOperand(0)); if (!Name) return false; @@ -1271,7 +1261,7 @@ public: /// InductionList saves induction variables and maps them to the /// induction descriptor. - typedef MapVector InductionList; + typedef MapVector InductionList; /// RecurrenceSet contains the phi nodes that are recurrences other than /// inductions and reductions. @@ -1324,16 +1314,14 @@ public: bool isUniform(Value *V); /// Returns true if this instruction will remain scalar after vectorization. - bool isUniformAfterVectorization(Instruction* I) { return Uniforms.count(I); } + bool isUniformAfterVectorization(Instruction *I) { return Uniforms.count(I); } /// Returns the information that we collected about runtime memory check. const RuntimePointerChecking *getRuntimePointerChecking() const { return LAI->getRuntimePointerChecking(); } - const LoopAccessInfo *getLAI() const { - return LAI; - } + const LoopAccessInfo *getLAI() const { return LAI; } /// \brief Check if \p Instr belongs to any interleaved access group. bool isAccessInterleaved(Instruction *Instr) { @@ -1383,18 +1371,11 @@ public: /// Returns true if vector representation of the instruction \p I /// requires mask. - bool isMaskRequired(const Instruction* I) { - return (MaskedOp.count(I) != 0); - } - unsigned getNumStores() const { - return LAI->getNumStores(); - } - unsigned getNumLoads() const { - return LAI->getNumLoads(); - } - unsigned getNumPredStores() const { - return NumPredStores; - } + bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); } + unsigned getNumStores() const { return LAI->getNumStores(); } + unsigned getNumLoads() const { return LAI->getNumLoads(); } + unsigned getNumPredStores() const { return NumPredStores; } + private: /// Check if a single basic block loop is vectorizable. /// At this point we know that this is a loop with a constant trip count @@ -1479,10 +1460,10 @@ private: /// Allowed outside users. This holds the reduction /// vars which can be accessed from outside the loop. - SmallPtrSet AllowedExit; + SmallPtrSet AllowedExit; /// This set holds the variables which are known to be uniform after /// vectorization. - SmallPtrSet Uniforms; + SmallPtrSet Uniforms; /// Can we assume the absence of NaNs. bool HasFunNoNaNAttr; @@ -1513,9 +1494,9 @@ public: LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI, LoopVectorizationLegality *Legal, const TargetTransformInfo &TTI, - const TargetLibraryInfo *TLI, - DemandedBits *DB, AssumptionCache *AC, - const Function *F, const LoopVectorizeHints *Hints, + const TargetLibraryInfo *TLI, DemandedBits *DB, + AssumptionCache *AC, const Function *F, + const LoopVectorizeHints *Hints, SmallPtrSetImpl &ValuesToIgnore) : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB), TheFunction(F), Hints(Hints), ValuesToIgnore(ValuesToIgnore) {} @@ -1523,7 +1504,7 @@ public: /// Information about vectorization costs struct VectorizationFactor { unsigned Width; // Vector width with best cost - unsigned Cost; // Cost of the loop with that width + unsigned Cost; // Cost of the loop with that width }; /// \return The most profitable vectorization factor and the cost of that VF. /// This method checks every power of two up to VF. If UserVF is not ZERO @@ -1567,8 +1548,10 @@ public: private: /// The vectorization cost is a combination of the cost itself and a boolean - /// indicating whether any of the contributing operations will actually operate on - /// vector values after type legalization in the backend. If this latter value is + /// indicating whether any of the contributing operations will actually + /// operate on + /// vector values after type legalization in the backend. If this latter value + /// is /// false, then all operations will be scalarized (i.e. no vectorization has /// actually taken place). typedef std::pair VectorizationCostTy; @@ -1603,7 +1586,7 @@ public: /// Map of scalar integer values to the smallest bitwidth they can be legally /// represented as. The vector equivalents of these values should be truncated /// to this type. - MapVector MinBWs; + MapVector MinBWs; /// The loop that we evaluate. Loop *TheLoop; @@ -1699,9 +1682,8 @@ struct LoopVectorize : public FunctionPass { static char ID; explicit LoopVectorize(bool NoUnrolling = false, bool AlwaysVectorize = true) - : FunctionPass(ID), - DisableUnrolling(NoUnrolling), - AlwaysVectorize(AlwaysVectorize) { + : FunctionPass(ID), DisableUnrolling(NoUnrolling), + AlwaysVectorize(AlwaysVectorize) { initializeLoopVectorizePass(*PassRegistry::getPassRegistry()); } @@ -1823,7 +1805,8 @@ struct LoopVectorize : public FunctionPass { ? "disabled" : (Hints.getForce() == LoopVectorizeHints::FK_Enabled ? "enabled" - : "?")) << " width=" << Hints.getWidth() + : "?")) + << " width=" << Hints.getWidth() << " unroll=" << Hints.getInterleave() << "\n"); // Function containing loop @@ -1887,8 +1870,8 @@ struct LoopVectorize : public FunctionPass { // Check the function attributes to find out if this function should be // optimized for size. - bool OptForSize = Hints.getForce() != LoopVectorizeHints::FK_Enabled && - F->optForSize(); + bool OptForSize = + Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize(); // Compute the weighted frequency of this loop being executed and see if it // is less than 20% of the function entry baseline frequency. Note that we @@ -1908,7 +1891,7 @@ struct LoopVectorize : public FunctionPass { // vector instructions? if (F->hasFnAttribute(Attribute::NoImplicitFloat)) { DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat" - "attribute is used.\n"); + "attribute is used.\n"); emitAnalysisDiag( F, L, Hints, VectorizationReport() @@ -1924,10 +1907,9 @@ struct LoopVectorize : public FunctionPass { if (Hints.isPotentiallyUnsafe() && TTI->isFPVectorizationPotentiallyUnsafe()) { DEBUG(dbgs() << "LV: Potentially unsafe FP op prevents vectorization.\n"); - emitAnalysisDiag( - F, L, Hints, - VectorizationReport() - << "loop not vectorized due to unsafe FP support."); + emitAnalysisDiag(F, L, Hints, + VectorizationReport() + << "loop not vectorized due to unsafe FP support."); emitMissedWarning(F, L, Hints); return false; } @@ -2058,7 +2040,6 @@ struct LoopVectorize : public FunctionPass { AU.addPreserved(); AU.addPreserved(); } - }; } // end anonymous namespace @@ -2071,9 +2052,9 @@ struct LoopVectorize : public FunctionPass { Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) { // We need to place the broadcast of invariant variables outside the loop. Instruction *Instr = dyn_cast(V); - bool NewInstr = - (Instr && std::find(LoopVectorBody.begin(), LoopVectorBody.end(), - Instr->getParent()) != LoopVectorBody.end()); + bool NewInstr = (Instr && + std::find(LoopVectorBody.begin(), LoopVectorBody.end(), + Instr->getParent()) != LoopVectorBody.end()); bool Invariant = OrigLoop->isLoopInvariant(V) && !NewInstr; // Place the code for broadcasting invariant variables in the new preheader. @@ -2098,7 +2079,7 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Type *ITy = Val->getType()->getScalarType(); VectorType *Ty = cast(Val->getType()); int VLen = Ty->getNumElements(); - SmallVector Indices; + SmallVector Indices; // Create a vector of consecutive numbers from zero to VF. for (int i = 0; i < VLen; ++i) @@ -2204,7 +2185,7 @@ bool LoopVectorizationLegality::isUniform(Value *V) { return LAI->isUniform(V); } -InnerLoopVectorizer::VectorParts& +InnerLoopVectorizer::VectorParts & InnerLoopVectorizer::getVectorValue(Value *V) { assert(V != Induction && "The new induction variable should not be used."); assert(!V->getType()->isVectorTy() && "Can't widen a vector"); @@ -2225,7 +2206,7 @@ InnerLoopVectorizer::getVectorValue(Value *V) { Value *InnerLoopVectorizer::reverseVector(Value *Vec) { assert(Vec->getType()->isVectorTy() && "Invalid type"); - SmallVector ShuffleMask; + SmallVector ShuffleMask; for (unsigned i = 0; i < VF; ++i) ShuffleMask.push_back(Builder.getInt32(VF - i - 1)); @@ -2518,10 +2499,10 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { // scalarize the instruction. int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); bool Reverse = ConsecutiveStride < 0; - bool CreateGatherScatter = !ConsecutiveStride && - ((LI && Legal->isLegalMaskedGather(ScalarDataTy)) || - (SI && Legal->isLegalMaskedScatter(ScalarDataTy))); - + bool CreateGatherScatter = + !ConsecutiveStride && ((LI && Legal->isLegalMaskedGather(ScalarDataTy)) || + (SI && Legal->isLegalMaskedScatter(ScalarDataTy))); + if (!ConsecutiveStride && !CreateGatherScatter) return scalarizeInstruction(Instr); @@ -2534,83 +2515,83 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { if (ConsecutiveStride) { if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) { setDebugLocFromInst(Builder, Gep); - Value *PtrOperand = Gep->getPointerOperand(); - Value *FirstBasePtr = getVectorValue(PtrOperand)[0]; - FirstBasePtr = Builder.CreateExtractElement(FirstBasePtr, Zero); - - // Create the new GEP with the new induction variable. - GetElementPtrInst *Gep2 = cast(Gep->clone()); - Gep2->setOperand(0, FirstBasePtr); - Gep2->setName("gep.indvar.base"); - Ptr = Builder.Insert(Gep2); - } else if (Gep) { - setDebugLocFromInst(Builder, Gep); - assert(PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getPointerOperand()), - OrigLoop) && - "Base ptr must be invariant"); - // The last index does not have to be the induction. It can be - // consecutive and be a function of the index. For example A[I+1]; - unsigned NumOperands = Gep->getNumOperands(); - unsigned InductionOperand = getGEPInductionOperand(Gep); - // Create the new GEP with the new induction variable. - GetElementPtrInst *Gep2 = cast(Gep->clone()); - - for (unsigned i = 0; i < NumOperands; ++i) { - Value *GepOperand = Gep->getOperand(i); - Instruction *GepOperandInst = dyn_cast(GepOperand); - - // Update last index or loop invariant instruction anchored in loop. - if (i == InductionOperand || - (GepOperandInst && OrigLoop->contains(GepOperandInst))) { - assert((i == InductionOperand || - PSE.getSE()->isLoopInvariant(PSE.getSCEV(GepOperandInst), - OrigLoop)) && - "Must be last index or loop invariant"); - - VectorParts &GEPParts = getVectorValue(GepOperand); - Value *Index = GEPParts[0]; - Index = Builder.CreateExtractElement(Index, Zero); - Gep2->setOperand(i, Index); - Gep2->setName("gep.indvar.idx"); - } + Value *PtrOperand = Gep->getPointerOperand(); + Value *FirstBasePtr = getVectorValue(PtrOperand)[0]; + FirstBasePtr = Builder.CreateExtractElement(FirstBasePtr, Zero); + + // Create the new GEP with the new induction variable. + GetElementPtrInst *Gep2 = cast(Gep->clone()); + Gep2->setOperand(0, FirstBasePtr); + Gep2->setName("gep.indvar.base"); + Ptr = Builder.Insert(Gep2); + } else if (Gep) { + setDebugLocFromInst(Builder, Gep); + assert(PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getPointerOperand()), + OrigLoop) && + "Base ptr must be invariant"); + // The last index does not have to be the induction. It can be + // consecutive and be a function of the index. For example A[I+1]; + unsigned NumOperands = Gep->getNumOperands(); + unsigned InductionOperand = getGEPInductionOperand(Gep); + // Create the new GEP with the new induction variable. + GetElementPtrInst *Gep2 = cast(Gep->clone()); + + for (unsigned i = 0; i < NumOperands; ++i) { + Value *GepOperand = Gep->getOperand(i); + Instruction *GepOperandInst = dyn_cast(GepOperand); + + // Update last index or loop invariant instruction anchored in loop. + if (i == InductionOperand || + (GepOperandInst && OrigLoop->contains(GepOperandInst))) { + assert((i == InductionOperand || + PSE.getSE()->isLoopInvariant(PSE.getSCEV(GepOperandInst), + OrigLoop)) && + "Must be last index or loop invariant"); + + VectorParts &GEPParts = getVectorValue(GepOperand); + Value *Index = GEPParts[0]; + Index = Builder.CreateExtractElement(Index, Zero); + Gep2->setOperand(i, Index); + Gep2->setName("gep.indvar.idx"); } - Ptr = Builder.Insert(Gep2); - } else { // No GEP - // Use the induction element ptr. - assert(isa(Ptr) && "Invalid induction ptr"); - setDebugLocFromInst(Builder, Ptr); - VectorParts &PtrVal = getVectorValue(Ptr); - Ptr = Builder.CreateExtractElement(PtrVal[0], Zero); } - } else { - // At this point we should vector version of GEP for Gather or Scatter - assert(CreateGatherScatter && "The instruction should be scalarized"); - if (Gep) { - SmallVector OpsV; - // Vectorizing GEP, across UF parts, we want to keep each loop-invariant - // base or index of GEP scalar - for (Value *Op : Gep->operands()) { - if (PSE.getSE()->isLoopInvariant(PSE.getSCEV(Op), OrigLoop)) - OpsV.push_back(VectorParts(UF, Op)); - else - OpsV.push_back(getVectorValue(Op)); - } - - for (unsigned Part = 0; Part < UF; ++Part) { - SmallVector Ops; - Value *GEPBasePtr = OpsV[0][Part]; - for (unsigned i = 1; i < Gep->getNumOperands(); i++) - Ops.push_back(OpsV[i][Part]); - Value *NewGep = Builder.CreateGEP(nullptr, GEPBasePtr, Ops, - "VectorGep"); - assert(NewGep->getType()->isVectorTy() && "Expected vector GEP"); - NewGep = Builder.CreateBitCast(NewGep, - VectorType::get(Ptr->getType(), VF)); - VectorGep.push_back(NewGep); - } - } else - VectorGep = getVectorValue(Ptr); + Ptr = Builder.Insert(Gep2); + } else { // No GEP + // Use the induction element ptr. + assert(isa(Ptr) && "Invalid induction ptr"); + setDebugLocFromInst(Builder, Ptr); + VectorParts &PtrVal = getVectorValue(Ptr); + Ptr = Builder.CreateExtractElement(PtrVal[0], Zero); } + } else { + // At this point we should vector version of GEP for Gather or Scatter + assert(CreateGatherScatter && "The instruction should be scalarized"); + if (Gep) { + SmallVector OpsV; + // Vectorizing GEP, across UF parts, we want to keep each loop-invariant + // base or index of GEP scalar + for (Value *Op : Gep->operands()) { + if (PSE.getSE()->isLoopInvariant(PSE.getSCEV(Op), OrigLoop)) + OpsV.push_back(VectorParts(UF, Op)); + else + OpsV.push_back(getVectorValue(Op)); + } + + for (unsigned Part = 0; Part < UF; ++Part) { + SmallVector Ops; + Value *GEPBasePtr = OpsV[0][Part]; + for (unsigned i = 1; i < Gep->getNumOperands(); i++) + Ops.push_back(OpsV[i][Part]); + Value *NewGep = + Builder.CreateGEP(nullptr, GEPBasePtr, Ops, "VectorGep"); + assert(NewGep->getType()->isVectorTy() && "Expected vector GEP"); + NewGep = + Builder.CreateBitCast(NewGep, VectorType::get(Ptr->getType(), VF)); + VectorGep.push_back(NewGep); + } + } else + VectorGep = getVectorValue(Ptr); + } VectorParts Mask = createBlockInMask(Instr->getParent()); // Handle Stores: @@ -2631,7 +2612,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { } else { // Calculate the pointer for the specific unroll-part. Value *PartPtr = - Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF)); + Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF)); if (Reverse) { // If we store to reverse consecutive memory locations, then we need @@ -2639,20 +2620,22 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { StoredVal[Part] = reverseVector(StoredVal[Part]); // If the address is consecutive but reversed, then the // wide store needs to start at the last vector element. - PartPtr = Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF)); - PartPtr = Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF)); + PartPtr = + Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF)); + PartPtr = + Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF)); Mask[Part] = reverseVector(Mask[Part]); } - Value *VecPtr = Builder.CreateBitCast(PartPtr, - DataTy->getPointerTo(AddressSpace)); + Value *VecPtr = + Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace)); if (Legal->isMaskRequired(SI)) NewSI = Builder.CreateMaskedStore(StoredVal[Part], VecPtr, Alignment, Mask[Part]); - else - NewSI = Builder.CreateAlignedStore(StoredVal[Part], VecPtr, - Alignment); + else + NewSI = + Builder.CreateAlignedStore(StoredVal[Part], VecPtr, Alignment); } addMetadata(NewSI, SI); } @@ -2663,16 +2646,16 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { assert(LI && "Must have a load instruction"); setDebugLocFromInst(Builder, LI); for (unsigned Part = 0; Part < UF; ++Part) { - Instruction* NewLI; + Instruction *NewLI; if (CreateGatherScatter) { Value *MaskPart = Legal->isMaskRequired(LI) ? Mask[Part] : nullptr; - NewLI = Builder.CreateMaskedGather(VectorGep[Part], Alignment, - MaskPart, 0, "wide.masked.gather"); + NewLI = Builder.CreateMaskedGather(VectorGep[Part], Alignment, MaskPart, + 0, "wide.masked.gather"); Entry[Part] = NewLI; } else { // Calculate the pointer for the specific unroll-part. Value *PartPtr = - Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF)); + Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF)); if (Reverse) { // If the address is consecutive but reversed, then the @@ -2682,15 +2665,15 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { Mask[Part] = reverseVector(Mask[Part]); } - Value *VecPtr = Builder.CreateBitCast(PartPtr, - DataTy->getPointerTo(AddressSpace)); + Value *VecPtr = + Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace)); if (Legal->isMaskRequired(LI)) NewLI = Builder.CreateMaskedLoad(VecPtr, Alignment, Mask[Part], UndefValue::get(DataTy), "wide.masked.load"); else NewLI = Builder.CreateAlignedLoad(VecPtr, Alignment, "wide.load"); - Entry[Part] = Reverse ? reverseVector(NewLI) : NewLI; + Entry[Part] = Reverse ? reverseVector(NewLI) : NewLI; } addMetadata(NewLI, LI); } @@ -2737,8 +2720,9 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, // Does this instruction return a value ? bool IsVoidRetTy = Instr->getType()->isVoidTy(); - Value *UndefVec = IsVoidRetTy ? nullptr : - UndefValue::get(VectorType::get(Instr->getType(), VF)); + Value *UndefVec = + IsVoidRetTy ? nullptr + : UndefValue::get(VectorType::get(Instr->getType(), VF)); // Create a new entry in the WidenMap and initialize it to Undef or Null. VectorParts &VecResults = WidenMap.splat(Instr, UndefVec); @@ -2791,8 +2775,8 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, Builder.getInt32(Width)); // End if-block. if (IfPredicateStore) - PredicatedStores.push_back(std::make_pair(cast(Cloned), - Cmp)); + PredicatedStores.push_back( + std::make_pair(cast(Cloned), Cmp)); } } } @@ -2866,9 +2850,8 @@ Value *InnerLoopVectorizer::getOrCreateTripCount(Loop *L) { if (TripCount->getType()->isPointerTy()) TripCount = - CastInst::CreatePointerCast(TripCount, IdxTy, - "exitcount.ptrcnt.to.int", - L->getLoopPreheader()->getTerminator()); + CastInst::CreatePointerCast(TripCount, IdxTy, "exitcount.ptrcnt.to.int", + L->getLoopPreheader()->getTerminator()); return TripCount; } @@ -2913,13 +2896,11 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L, // Generate code to check that the loop's trip count that we computed by // adding one to the backedge-taken count will not overflow. - Value *CheckMinIters = - Builder.CreateICmpULT(Count, - ConstantInt::get(Count->getType(), VF * UF), - "min.iters.check"); + Value *CheckMinIters = Builder.CreateICmpULT( + Count, ConstantInt::get(Count->getType(), VF * UF), "min.iters.check"); - BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(), - "min.iters.checked"); + BasicBlock *NewBB = + BB->splitBasicBlock(BB->getTerminator(), "min.iters.checked"); // Update dominator tree immediately if the generated block is a // LoopBypassBlock because SCEV expansions to generate loop bypass // checks may query it before the current function is finished. @@ -2944,8 +2925,7 @@ void InnerLoopVectorizer::emitVectorLoopEnteredCheck(Loop *L, // Generate code to check that the loop's trip count that we computed by // adding one to the backedge-taken count will not overflow. - BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(), - "vector.ph"); + BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph"); // Update dominator tree immediately if the generated block is a // LoopBypassBlock because SCEV expansions to generate loop bypass // checks may query it before the current function is finished. @@ -2987,8 +2967,7 @@ void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) { AddedSafetyChecks = true; } -void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, - BasicBlock *Bypass) { +void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) { BasicBlock *BB = L->getLoopPreheader(); // Generate the code that checks in runtime if arrays overlap. We put the @@ -3022,7 +3001,6 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, LVer->prepareNoAliasMetadata(); } - void InnerLoopVectorizer::createEmptyLoop() { /* In this function we generate a new loop. The new loop will contain @@ -3080,12 +3058,12 @@ void InnerLoopVectorizer::createEmptyLoop() { BasicBlock *VecBody = VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body"); BasicBlock *MiddleBlock = - VecBody->splitBasicBlock(VecBody->getTerminator(), "middle.block"); + VecBody->splitBasicBlock(VecBody->getTerminator(), "middle.block"); BasicBlock *ScalarPH = - MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(), "scalar.ph"); + MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(), "scalar.ph"); // Create and register the new vector loop. - Loop* Lp = new Loop(); + Loop *Lp = new Loop(); Loop *ParentLoop = OrigLoop->getParentLoop(); // Insert the new loop into the loop nest and register the new basic blocks @@ -3127,8 +3105,8 @@ void InnerLoopVectorizer::createEmptyLoop() { Value *CountRoundDown = getOrCreateVectorTripCount(Lp); Constant *Step = ConstantInt::get(IdxTy, VF * UF); Induction = - createInductionVariable(Lp, StartIdx, CountRoundDown, Step, - getDebugLocFromInstOrOperands(OldInduction)); + createInductionVariable(Lp, StartIdx, CountRoundDown, Step, + getDebugLocFromInstOrOperands(OldInduction)); // We are going to resume the execution of the scalar loop. // Go over all of the induction variables that we found and fix the @@ -3148,18 +3126,16 @@ void InnerLoopVectorizer::createEmptyLoop() { InductionDescriptor II = I->second; // Create phi nodes to merge from the backedge-taken check block. - PHINode *BCResumeVal = PHINode::Create(OrigPhi->getType(), 3, - "bc.resume.val", - ScalarPH->getTerminator()); + PHINode *BCResumeVal = PHINode::Create( + OrigPhi->getType(), 3, "bc.resume.val", ScalarPH->getTerminator()); Value *EndValue; if (OrigPhi == OldInduction) { // We know what the end value is. EndValue = CountRoundDown; } else { IRBuilder<> B(LoopBypassBlocks.back()->getTerminator()); - Value *CRD = B.CreateSExtOrTrunc(CountRoundDown, - II.getStepValue()->getType(), - "cast.crd"); + Value *CRD = B.CreateSExtOrTrunc( + CountRoundDown, II.getStepValue()->getType(), "cast.crd"); EndValue = II.transform(B, CRD); EndValue->setName("ind.end"); } @@ -3181,9 +3157,9 @@ void InnerLoopVectorizer::createEmptyLoop() { // Add a check in the middle block to see if we have completed // all of the iterations in the first vector loop. // If (N - N%VF) == N, then we *don't* need to run the remainder. - Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count, - CountRoundDown, "cmp.n", - MiddleBlock->getTerminator()); + Value *CmpN = + CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count, + CountRoundDown, "cmp.n", MiddleBlock->getTerminator()); ReplaceInstWithInst(MiddleBlock->getTerminator(), BranchInst::Create(ExitBlock, ScalarPH, CmpN)); @@ -3238,9 +3214,7 @@ struct CSEDenseMapInfo { /// = ...; " blocks. We start with one vectorized basic block. For every /// conditional block we split this vectorized block. Therefore, every second /// block will be a predicated one. -static bool isPredicatedBlock(unsigned BlockNum) { - return BlockNum % 2; -} +static bool isPredicatedBlock(unsigned BlockNum) { return BlockNum % 2; } ///\brief Perform cse of induction variable instructions. static void cse(SmallVector &BBs) { @@ -3274,7 +3248,7 @@ static void cse(SmallVector &BBs) { /// \brief Adds a 'fast' flag to floating point operations. static Value *addFastMathFlag(Value *V) { - if (isa(V)){ + if (isa(V)) { FastMathFlags Flags; Flags.setUnsafeAlgebra(); cast(V)->setFastMathFlags(Flags); @@ -3397,8 +3371,8 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() { if (I->use_empty()) continue; Type *OriginalTy = I->getType(); - Type *ScalarTruncatedTy = IntegerType::get(OriginalTy->getContext(), - KV.second); + Type *ScalarTruncatedTy = + IntegerType::get(OriginalTy->getContext(), KV.second); Type *TruncatedTy = VectorType::get(ScalarTruncatedTy, OriginalTy->getVectorNumElements()); if (TruncatedTy == OriginalTy) @@ -3408,7 +3382,7 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() { continue; IRBuilder<> B(cast(I)); - auto ShrinkOperand = [&](Value *V) -> Value* { + auto ShrinkOperand = [&](Value *V) -> Value * { if (auto *ZI = dyn_cast(V)) if (ZI->getSrcTy() == TruncatedTy) return ZI->getOperand(0); @@ -3419,44 +3393,42 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() { // unfortunately. Value *NewI = nullptr; if (BinaryOperator *BO = dyn_cast(I)) { - NewI = B.CreateBinOp(BO->getOpcode(), - ShrinkOperand(BO->getOperand(0)), + NewI = B.CreateBinOp(BO->getOpcode(), ShrinkOperand(BO->getOperand(0)), ShrinkOperand(BO->getOperand(1))); cast(NewI)->copyIRFlags(I); } else if (ICmpInst *CI = dyn_cast(I)) { - NewI = B.CreateICmp(CI->getPredicate(), - ShrinkOperand(CI->getOperand(0)), - ShrinkOperand(CI->getOperand(1))); + NewI = + B.CreateICmp(CI->getPredicate(), ShrinkOperand(CI->getOperand(0)), + ShrinkOperand(CI->getOperand(1))); } else if (SelectInst *SI = dyn_cast(I)) { NewI = B.CreateSelect(SI->getCondition(), ShrinkOperand(SI->getTrueValue()), ShrinkOperand(SI->getFalseValue())); } else if (CastInst *CI = dyn_cast(I)) { switch (CI->getOpcode()) { - default: llvm_unreachable("Unhandled cast!"); + default: + llvm_unreachable("Unhandled cast!"); case Instruction::Trunc: NewI = ShrinkOperand(CI->getOperand(0)); break; case Instruction::SExt: - NewI = B.CreateSExtOrTrunc(CI->getOperand(0), - smallestIntegerVectorType(OriginalTy, - TruncatedTy)); + NewI = B.CreateSExtOrTrunc( + CI->getOperand(0), + smallestIntegerVectorType(OriginalTy, TruncatedTy)); break; case Instruction::ZExt: - NewI = B.CreateZExtOrTrunc(CI->getOperand(0), - smallestIntegerVectorType(OriginalTy, - TruncatedTy)); + NewI = B.CreateZExtOrTrunc( + CI->getOperand(0), + smallestIntegerVectorType(OriginalTy, TruncatedTy)); break; } } else if (ShuffleVectorInst *SI = dyn_cast(I)) { auto Elements0 = SI->getOperand(0)->getType()->getVectorNumElements(); - auto *O0 = - B.CreateZExtOrTrunc(SI->getOperand(0), - VectorType::get(ScalarTruncatedTy, Elements0)); + auto *O0 = B.CreateZExtOrTrunc( + SI->getOperand(0), VectorType::get(ScalarTruncatedTy, Elements0)); auto Elements1 = SI->getOperand(1)->getType()->getVectorNumElements(); - auto *O1 = - B.CreateZExtOrTrunc(SI->getOperand(1), - VectorType::get(ScalarTruncatedTy, Elements1)); + auto *O1 = B.CreateZExtOrTrunc( + SI->getOperand(1), VectorType::get(ScalarTruncatedTy, Elements1)); NewI = B.CreateShuffleVector(O0, O1, SI->getMask()); } else if (isa(I)) { @@ -3525,8 +3497,8 @@ void InnerLoopVectorizer::vectorizeLoop() { DFS.perform(LI); // Vectorize all of the blocks in the original loop. - for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(), - be = DFS.endRPO(); bb != be; ++bb) + for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(), be = DFS.endRPO(); + bb != be; ++bb) vectorizeBlockInLoop(*bb, &PHIsToFix); // Insert truncates and extends for any truncated instructions as hints to @@ -3614,10 +3586,10 @@ void InnerLoopVectorizer::vectorizeLoop() { // Make sure to add the reduction stat value only to the // first unroll part. Value *StartVal = (part == 0) ? VectorStart : Identity; - cast(VecRdxPhi[part])->addIncoming(StartVal, - LoopVectorPreHeader); - cast(VecRdxPhi[part])->addIncoming(Val[part], - LoopVectorBody.back()); + cast(VecRdxPhi[part]) + ->addIncoming(StartVal, LoopVectorPreHeader); + cast(VecRdxPhi[part]) + ->addIncoming(Val[part], LoopVectorBody.back()); } // Before each round, move the insertion point right between @@ -3675,21 +3647,19 @@ void InnerLoopVectorizer::vectorizeLoop() { assert(isPowerOf2_32(VF) && "Reduction emission only supported for pow2 vectors!"); Value *TmpVec = ReducedPartRdx; - SmallVector ShuffleMask(VF, nullptr); + SmallVector ShuffleMask(VF, nullptr); for (unsigned i = VF; i != 1; i >>= 1) { // Move the upper half of the vector to the lower half. - for (unsigned j = 0; j != i/2; ++j) - ShuffleMask[j] = Builder.getInt32(i/2 + j); + for (unsigned j = 0; j != i / 2; ++j) + ShuffleMask[j] = Builder.getInt32(i / 2 + j); // Fill the rest of the mask with undef. - std::fill(&ShuffleMask[i/2], ShuffleMask.end(), + std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), UndefValue::get(Builder.getInt32Ty())); - Value *Shuf = - Builder.CreateShuffleVector(TmpVec, - UndefValue::get(TmpVec->getType()), - ConstantVector::get(ShuffleMask), - "rdx.shuf"); + Value *Shuf = Builder.CreateShuffleVector( + TmpVec, UndefValue::get(TmpVec->getType()), + ConstantVector::get(ShuffleMask), "rdx.shuf"); if (Op != Instruction::ICmp && Op != Instruction::FCmp) // Floating point operations had to be 'fast' to enable the reduction. @@ -3701,8 +3671,8 @@ void InnerLoopVectorizer::vectorizeLoop() { } // The result is in the first element of the vector. - ReducedPartRdx = Builder.CreateExtractElement(TmpVec, - Builder.getInt32(0)); + ReducedPartRdx = + Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); // If the reduction can be performed in a smaller type, we need to extend // the reduction to the wider type before we branch to the original loop. @@ -3726,9 +3696,11 @@ void InnerLoopVectorizer::vectorizeLoop() { // We know that the loop is in LCSSA form. We need to update the // PHI nodes in the exit blocks. for (BasicBlock::iterator LEI = LoopExitBlock->begin(), - LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) { + LEE = LoopExitBlock->end(); + LEI != LEE; ++LEI) { PHINode *LCSSAPhi = dyn_cast(LEI); - if (!LCSSAPhi) break; + if (!LCSSAPhi) + break; // All PHINodes need to have a single entry edge, or two if // we already fixed them. @@ -3741,7 +3713,7 @@ void InnerLoopVectorizer::vectorizeLoop() { LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock); break; } - }// end of the LCSSA phi scan. + } // end of the LCSSA phi scan. // Fix the scalar loop reduction variable with the incoming reduction sum // from the vector body and from the backedge value. @@ -3928,9 +3900,11 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) { void InnerLoopVectorizer::fixLCSSAPHIs() { for (BasicBlock::iterator LEI = LoopExitBlock->begin(), - LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) { + LEE = LoopExitBlock->end(); + LEI != LEE; ++LEI) { PHINode *LCSSAPhi = dyn_cast(LEI); - if (!LCSSAPhi) break; + if (!LCSSAPhi) + break; if (LCSSAPhi->getNumIncomingValues() == 1) LCSSAPhi->addIncoming(UndefValue::get(LCSSAPhi->getType()), LoopMiddleBlock); @@ -3943,7 +3917,7 @@ InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) { "Invalid edge"); // Look for cached value. - std::pair Edge(Src, Dst); + std::pair Edge(Src, Dst); EdgeMaskCache::iterator ECEntryIt = MaskCache.find(Edge); if (ECEntryIt != MaskCache.end()) return ECEntryIt->second; @@ -3999,13 +3973,13 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) { void InnerLoopVectorizer::widenPHIInstruction( Instruction *PN, InnerLoopVectorizer::VectorParts &Entry, unsigned UF, unsigned VF, PhiVector *PV) { - PHINode* P = cast(PN); + PHINode *P = cast(PN); // Handle recurrences. if (Legal->isReductionVariable(P) || Legal->isFirstOrderRecurrence(P)) { for (unsigned part = 0; part < UF; ++part) { // This is phase one of vectorizing PHIs. - Type *VecTy = (VF == 1) ? PN->getType() : - VectorType::get(PN->getType(), VF); + Type *VecTy = + (VF == 1) ? PN->getType() : VectorType::get(PN->getType(), VF); Entry[part] = PHINode::Create( VecTy, 2, "vec.phi", &*LoopVectorBody.back()->getFirstInsertionPt()); } @@ -4030,21 +4004,20 @@ void InnerLoopVectorizer::widenPHIInstruction( // SELECT(Mask2, In2, // ( ...))) for (unsigned In = 0; In < NumIncoming; In++) { - VectorParts Cond = createEdgeMask(P->getIncomingBlock(In), - P->getParent()); + VectorParts Cond = + createEdgeMask(P->getIncomingBlock(In), P->getParent()); VectorParts &In0 = getVectorValue(P->getIncomingValue(In)); for (unsigned part = 0; part < UF; ++part) { // We might have single edge PHIs (blocks) - use an identity // 'select' for the first PHI operand. if (In == 0) - Entry[part] = Builder.CreateSelect(Cond[part], In0[part], - In0[part]); + Entry[part] = Builder.CreateSelect(Cond[part], In0[part], In0[part]); else // Select between the current value and the previous incoming edge // based on the incoming mask. - Entry[part] = Builder.CreateSelect(Cond[part], In0[part], - Entry[part], "predphi"); + Entry[part] = Builder.CreateSelect(Cond[part], In0[part], Entry[part], + "predphi"); } } return; @@ -4052,67 +4025,64 @@ void InnerLoopVectorizer::widenPHIInstruction( // This PHINode must be an induction variable. // Make sure that we know about it. - assert(Legal->getInductionVars()->count(P) && - "Not an induction variable"); + assert(Legal->getInductionVars()->count(P) && "Not an induction variable"); InductionDescriptor II = Legal->getInductionVars()->lookup(P); // FIXME: The newly created binary instructions should contain nsw/nuw flags, // which can be found from the original scalar operations. switch (II.getKind()) { - case InductionDescriptor::IK_NoInduction: - llvm_unreachable("Unknown induction"); - case InductionDescriptor::IK_IntInduction: { - assert(P->getType() == II.getStartValue()->getType() && - "Types must match"); - // Handle other induction variables that are now based on the - // canonical one. - Value *V = Induction; - if (P != OldInduction) { - V = Builder.CreateSExtOrTrunc(Induction, P->getType()); - V = II.transform(Builder, V); - V->setName("offset.idx"); - } - Value *Broadcasted = getBroadcastInstrs(V); - // After broadcasting the induction variable we need to make the vector - // consecutive by adding 0, 1, 2, etc. - for (unsigned part = 0; part < UF; ++part) - Entry[part] = getStepVector(Broadcasted, VF * part, II.getStepValue()); - return; + case InductionDescriptor::IK_NoInduction: + llvm_unreachable("Unknown induction"); + case InductionDescriptor::IK_IntInduction: { + assert(P->getType() == II.getStartValue()->getType() && "Types must match"); + // Handle other induction variables that are now based on the + // canonical one. + Value *V = Induction; + if (P != OldInduction) { + V = Builder.CreateSExtOrTrunc(Induction, P->getType()); + V = II.transform(Builder, V); + V->setName("offset.idx"); } - case InductionDescriptor::IK_PtrInduction: - // Handle the pointer induction variable case. - assert(P->getType()->isPointerTy() && "Unexpected type."); - // This is the normalized GEP that starts counting at zero. - Value *PtrInd = Induction; - PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStepValue()->getType()); - // This is the vector of results. Notice that we don't generate - // vector geps because scalar geps result in better code. - for (unsigned part = 0; part < UF; ++part) { - if (VF == 1) { - int EltIndex = part; - Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex); - Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx); - Value *SclrGep = II.transform(Builder, GlobalIdx); - SclrGep->setName("next.gep"); - Entry[part] = SclrGep; - continue; - } + Value *Broadcasted = getBroadcastInstrs(V); + // After broadcasting the induction variable we need to make the vector + // consecutive by adding 0, 1, 2, etc. + for (unsigned part = 0; part < UF; ++part) + Entry[part] = getStepVector(Broadcasted, VF * part, II.getStepValue()); + return; + } + case InductionDescriptor::IK_PtrInduction: + // Handle the pointer induction variable case. + assert(P->getType()->isPointerTy() && "Unexpected type."); + // This is the normalized GEP that starts counting at zero. + Value *PtrInd = Induction; + PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStepValue()->getType()); + // This is the vector of results. Notice that we don't generate + // vector geps because scalar geps result in better code. + for (unsigned part = 0; part < UF; ++part) { + if (VF == 1) { + int EltIndex = part; + Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex); + Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx); + Value *SclrGep = II.transform(Builder, GlobalIdx); + SclrGep->setName("next.gep"); + Entry[part] = SclrGep; + continue; + } - Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF)); - for (unsigned int i = 0; i < VF; ++i) { - int EltIndex = i + part * VF; - Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex); - Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx); - Value *SclrGep = II.transform(Builder, GlobalIdx); - SclrGep->setName("next.gep"); - VecVal = Builder.CreateInsertElement(VecVal, SclrGep, - Builder.getInt32(i), - "insert.gep"); - } - Entry[part] = VecVal; + Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF)); + for (unsigned int i = 0; i < VF; ++i) { + int EltIndex = i + part * VF; + Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex); + Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx); + Value *SclrGep = II.transform(Builder, GlobalIdx); + SclrGep->setName("next.gep"); + VecVal = Builder.CreateInsertElement(VecVal, SclrGep, + Builder.getInt32(i), "insert.gep"); } - return; + Entry[part] = VecVal; + } + return; } } @@ -4130,7 +4100,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { // Vectorize PHINodes. widenPHIInstruction(&*it, Entry, UF, VF, PV); continue; - }// End of PHI. + } // End of PHI. case Instruction::Add: case Instruction::FAdd: @@ -4183,17 +4153,17 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { // We have to take the 'vectorized' value and pick the first lane. // Instcombine will make this a no-op. VectorParts &Cond = getVectorValue(it->getOperand(0)); - VectorParts &Op0 = getVectorValue(it->getOperand(1)); - VectorParts &Op1 = getVectorValue(it->getOperand(2)); + VectorParts &Op0 = getVectorValue(it->getOperand(1)); + VectorParts &Op1 = getVectorValue(it->getOperand(2)); - Value *ScalarCond = (VF == 1) ? Cond[0] : - Builder.CreateExtractElement(Cond[0], Builder.getInt32(0)); + Value *ScalarCond = + (VF == 1) + ? Cond[0] + : Builder.CreateExtractElement(Cond[0], Builder.getInt32(0)); for (unsigned Part = 0; Part < UF; ++Part) { Entry[Part] = Builder.CreateSelect( - InvariantCond ? ScalarCond : Cond[Part], - Op0[Part], - Op1[Part]); + InvariantCond ? ScalarCond : Cond[Part], Op0[Part], Op1[Part]); } addMetadata(Entry, &*it); @@ -4226,7 +4196,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { case Instruction::Store: case Instruction::Load: vectorizeMemoryInstruction(&*it); - break; + break; case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: @@ -4247,8 +4217,8 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { /// c. other casts depend on pointer size. if (CI->getOperand(0) == OldInduction && it->getOpcode() == Instruction::Trunc) { - Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction, - CI->getType()); + Value *ScalarCast = + Builder.CreateCast(CI->getOpcode(), Induction, CI->getType()); Value *Broadcasted = getBroadcastInstrs(ScalarCast); InductionDescriptor II = Legal->getInductionVars()->lookup(OldInduction); @@ -4260,8 +4230,8 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { break; } /// Vectorize casts. - Type *DestTy = (VF == 1) ? CI->getType() : - VectorType::get(CI->getType(), VF); + Type *DestTy = + (VF == 1) ? CI->getType() : VectorType::get(CI->getType(), VF); VectorParts &A = getVectorValue(it->getOperand(0)); for (unsigned Part = 0; Part < UF; ++Part) @@ -4287,9 +4257,8 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF)); Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); - if (ID && - (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end || - ID == Intrinsic::lifetime_start)) { + if (ID && (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end || + ID == Intrinsic::lifetime_start)) { scalarizeInstruction(&*it); break; } @@ -4358,8 +4327,8 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { // All other instructions are unsupported. Scalarize them. scalarizeInstruction(&*it); break; - }// end of switch. - }// end of for_each instr. + } // end of switch. + } // end of for_each instr. } void InnerLoopVectorizer::updateAnalysis() { @@ -4413,7 +4382,8 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { // Collect safe addresses. for (Loop::block_iterator BI = TheLoop->block_begin(), - BE = TheLoop->block_end(); BI != BE; ++BI) { + BE = TheLoop->block_end(); + BI != BE; ++BI) { BasicBlock *BB = *BI; if (blockNeedsPredication(BB)) @@ -4430,7 +4400,8 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { // Collect the blocks that need predication. BasicBlock *Header = TheLoop->getHeader(); for (Loop::block_iterator BI = TheLoop->block_begin(), - BE = TheLoop->block_end(); BI != BE; ++BI) { + BE = TheLoop->block_end(); + BI != BE; ++BI) { BasicBlock *BB = *BI; // We don't support switch statements inside loops. @@ -4462,9 +4433,8 @@ bool LoopVectorizationLegality::canVectorize() { // We must have a loop in canonical form. Loops with indirectbr in them cannot // be canonicalized. if (!TheLoop->getLoopPreheader()) { - emitAnalysis( - VectorizationReport() << - "loop control flow is not understood by vectorizer"); + emitAnalysis(VectorizationReport() + << "loop control flow is not understood by vectorizer"); return false; } @@ -4476,17 +4446,15 @@ bool LoopVectorizationLegality::canVectorize() { // We must have a single backedge. if (TheLoop->getNumBackEdges() != 1) { - emitAnalysis( - VectorizationReport() << - "loop control flow is not understood by vectorizer"); + emitAnalysis(VectorizationReport() + << "loop control flow is not understood by vectorizer"); return false; } // We must have a single exiting block. if (!TheLoop->getExitingBlock()) { - emitAnalysis( - VectorizationReport() << - "loop control flow is not understood by vectorizer"); + emitAnalysis(VectorizationReport() + << "loop control flow is not understood by vectorizer"); return false; } @@ -4494,15 +4462,14 @@ bool LoopVectorizationLegality::canVectorize() { // checked at the end of each iteration. With that we can assume that all // instructions in the loop are executed the same number of times. if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) { - emitAnalysis( - VectorizationReport() << - "loop control flow is not understood by vectorizer"); + emitAnalysis(VectorizationReport() + << "loop control flow is not understood by vectorizer"); return false; } // We need to have a loop header. - DEBUG(dbgs() << "LV: Found a loop: " << - TheLoop->getHeader()->getName() << '\n'); + DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName() + << '\n'); // Check if we can if-convert non-single-bb loops. unsigned NumBlocks = TheLoop->getNumBlocks(); @@ -4581,7 +4548,7 @@ static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) { return Ty; } -static Type* getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) { +static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) { Ty0 = convertPointerToIntegerType(DL, Ty0); Ty1 = convertPointerToIntegerType(DL, Ty1); if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits()) @@ -4596,7 +4563,7 @@ static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, // Reduction instructions are allowed to have exit users. All other // instructions must not have external users. if (!Reductions.count(Inst)) - //Check that all of the users of the loop are inside the BB. + // Check that all of the users of the loop are inside the BB. for (User *U : Inst->users()) { Instruction *UI = cast(U); // This user may be a reduction exit value. @@ -4619,7 +4586,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // For each block in the loop. for (Loop::block_iterator bb = TheLoop->block_begin(), - be = TheLoop->block_end(); bb != be; ++bb) { + be = TheLoop->block_end(); + bb != be; ++bb) { // Scan the instructions in the block and look for hazards. for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e; @@ -4628,8 +4596,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (PHINode *Phi = dyn_cast(it)) { Type *PhiTy = Phi->getType(); // Check that this PHI type is allowed. - if (!PhiTy->isIntegerTy() && - !PhiTy->isFloatingPointTy() && + if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() && !PhiTy->isPointerTy()) { emitAnalysis(VectorizationReport(&*it) << "loop control flow is not understood by vectorizer"); @@ -4645,9 +4612,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // identified reduction value with an outside user. if (!hasOutsideLoopUser(TheLoop, &*it, AllowedExit)) continue; - emitAnalysis(VectorizationReport(&*it) << - "value could not be identified as " - "an induction or reduction variable"); + emitAnalysis(VectorizationReport(&*it) + << "value could not be identified as " + "an induction or reduction variable"); return false; } @@ -4670,9 +4637,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Int inductions are special because we only allow one IV. if (ID.getKind() == InductionDescriptor::IK_IntInduction && - ID.getStepValue()->isOne() && - isa(ID.getStartValue()) && - cast(ID.getStartValue())->isNullValue()) { + ID.getStepValue()->isOne() && isa(ID.getStartValue()) && + cast(ID.getStartValue())->isNullValue()) { // Use the phi node with the widest type as induction. Use the last // one if there are multiple (no good reason for doing this other // than it is expedient). We've checked that it begins at zero and @@ -4686,9 +4652,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Until we explicitly handle the case of an induction variable with // an outside loop user we have to give up vectorizing this loop. if (hasOutsideLoopUser(TheLoop, &*it, AllowedExit)) { - emitAnalysis(VectorizationReport(&*it) << - "use of induction value outside of the " - "loop is not handled by vectorizer"); + emitAnalysis(VectorizationReport(&*it) + << "use of induction value outside of the " + "loop is not handled by vectorizer"); return false; } @@ -4709,19 +4675,20 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { continue; } - emitAnalysis(VectorizationReport(&*it) << - "value that could not be identified as " - "reduction is used outside the loop"); - DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n"); + emitAnalysis(VectorizationReport(&*it) + << "value that could not be identified as " + "reduction is used outside the loop"); + DEBUG(dbgs() << "LV: Found an unidentified PHI." << *Phi << "\n"); return false; - }// end of PHI handling + } // end of PHI handling // We handle calls that: // * Are debug info intrinsics. // * Have a mapping to an IR intrinsic. // * Have a vector version available. CallInst *CI = dyn_cast(it); - if (CI && !getVectorIntrinsicIDForCall(CI, TLI) && !isa(CI) && + if (CI && !getVectorIntrinsicIDForCall(CI, TLI) && + !isa(CI) && !(CI->getCalledFunction() && TLI && TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) { emitAnalysis(VectorizationReport(&*it) @@ -4732,8 +4699,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Intrinsics such as powi,cttz and ctlz are legal to vectorize if the // second argument is the same (i.e. loop invariant) - if (CI && - hasVectorInstrinsicScalarOpd(getVectorIntrinsicIDForCall(CI, TLI), 1)) { + if (CI && hasVectorInstrinsicScalarOpd( + getVectorIntrinsicIDForCall(CI, TLI), 1)) { auto *SE = PSE.getSE(); if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(1)), TheLoop)) { emitAnalysis(VectorizationReport(&*it) @@ -4746,7 +4713,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Check that the instruction return type is vectorizable. // Also, we can't vectorize extractelement instructions. if ((!VectorType::isValidElementType(it->getType()) && - !it->getType()->isVoidTy()) || isa(it)) { + !it->getType()->isVoidTy()) || + isa(it)) { emitAnalysis(VectorizationReport(&*it) << "instruction return type cannot be vectorized"); DEBUG(dbgs() << "LV: Found unvectorizable type.\n"); @@ -4757,8 +4725,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (StoreInst *ST = dyn_cast(it)) { Type *T = ST->getValueOperand()->getType(); if (!VectorType::isValidElementType(T)) { - emitAnalysis(VectorizationReport(ST) << - "store instruction cannot be vectorized"); + emitAnalysis(VectorizationReport(ST) + << "store instruction cannot be vectorized"); return false; } if (EnableMemAccessVersioning) @@ -4768,14 +4736,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (EnableMemAccessVersioning) collectStridedAccess(LI); - // FP instructions can allow unsafe algebra, thus vectorizable by - // non-IEEE-754 compliant SIMD units. - // This applies to floating-point math operations and calls, not memory - // operations, shuffles, or casts, as they don't change precision or - // semantics. + // FP instructions can allow unsafe algebra, thus vectorizable by + // non-IEEE-754 compliant SIMD units. + // This applies to floating-point math operations and calls, not memory + // operations, shuffles, or casts, as they don't change precision or + // semantics. } else if (it->getType()->isFloatingPointTy() && - (CI || it->isBinaryOp()) && - !it->hasUnsafeAlgebra()) { + (CI || it->isBinaryOp()) && !it->hasUnsafeAlgebra()) { DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n"); Hints->setPotentiallyUnsafe(); } @@ -4783,13 +4750,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Reduction instructions are allowed to have exit users. // All other instructions must not have external users. if (hasOutsideLoopUser(TheLoop, &*it, AllowedExit)) { - emitAnalysis(VectorizationReport(&*it) << - "value cannot be used outside the loop"); + emitAnalysis(VectorizationReport(&*it) + << "value cannot be used outside the loop"); return false; } } // next instr. - } if (!Induction) { @@ -4832,7 +4798,7 @@ void LoopVectorizationLegality::collectStridedAccess(Value *MemAccess) { void LoopVectorizationLegality::collectLoopUniforms() { // We now know that the loop is vectorizable! // Collect variables that will remain uniform after vectorization. - std::vector Worklist; + std::vector Worklist; BasicBlock *Latch = TheLoop->getLoopLatch(); // Start with the conditional branch and walk up the block. @@ -4842,9 +4808,9 @@ void LoopVectorizationLegality::collectLoopUniforms() { // after vectorization (and subsequent cleanup) and, until revectorization is // supported, all dependencies must also be uniform. for (Loop::block_iterator B = TheLoop->block_begin(), - BE = TheLoop->block_end(); B != BE; ++B) - for (BasicBlock::iterator I = (*B)->begin(), IE = (*B)->end(); - I != IE; ++I) + BE = TheLoop->block_end(); + B != BE; ++B) + for (BasicBlock::iterator I = (*B)->begin(), IE = (*B)->end(); I != IE; ++I) if (I->getType()->isPointerTy() && isConsecutivePtr(&*I)) Worklist.insert(Worklist.end(), I->op_begin(), I->op_end()); @@ -4889,7 +4855,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() { } bool LoopVectorizationLegality::isInductionVariable(const Value *V) { - Value *In0 = const_cast(V); + Value *In0 = const_cast(V); PHINode *PN = dyn_cast_or_null(In0); if (!PN) return false; @@ -4901,12 +4867,12 @@ bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) { return FirstOrderRecurrences.count(Phi); } -bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) { +bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) { return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); } -bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB, - SmallPtrSetImpl &SafePtrs) { +bool LoopVectorizationLegality::blockCanBePredicated( + BasicBlock *BB, SmallPtrSetImpl &SafePtrs) { const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { @@ -4963,7 +4929,8 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB, // The instructions below can trap. switch (it->getOpcode()) { - default: continue; + default: + continue; case Instruction::UDiv: case Instruction::SDiv: case Instruction::URem: @@ -5157,20 +5124,22 @@ void InterleavedAccessInfo::analyzeInterleaving( LoopVectorizationCostModel::VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { // Width 1 means no vectorize - VectorizationFactor Factor = { 1U, 0U }; + VectorizationFactor Factor = {1U, 0U}; if (OptForSize && Legal->getRuntimePointerChecking()->Need) { - emitAnalysis(VectorizationReport() << - "runtime pointer checks needed. Enable vectorization of this " - "loop with '#pragma clang loop vectorize(enable)' when " - "compiling with -Os/-Oz"); - DEBUG(dbgs() << - "LV: Aborting. Runtime ptr check is required with -Os/-Oz.\n"); + emitAnalysis( + VectorizationReport() + << "runtime pointer checks needed. Enable vectorization of this " + "loop with '#pragma clang loop vectorize(enable)' when " + "compiling with -Os/-Oz"); + DEBUG(dbgs() + << "LV: Aborting. Runtime ptr check is required with -Os/-Oz.\n"); return Factor; } if (!EnableCondStoresVectorization && Legal->getNumPredStores()) { - emitAnalysis(VectorizationReport() << - "store that is conditionally executed prevents vectorization"); + emitAnalysis( + VectorizationReport() + << "store that is conditionally executed prevents vectorization"); DEBUG(dbgs() << "LV: No vectorization. There are conditional stores.\n"); return Factor; } @@ -5186,14 +5155,14 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { unsigned MaxSafeDepDist = -1U; if (Legal->getMaxSafeDepDistBytes() != -1U) MaxSafeDepDist = Legal->getMaxSafeDepDistBytes() * 8; - WidestRegister = ((WidestRegister < MaxSafeDepDist) ? - WidestRegister : MaxSafeDepDist); + WidestRegister = + ((WidestRegister < MaxSafeDepDist) ? WidestRegister : MaxSafeDepDist); unsigned MaxVectorSize = WidestRegister / WidestType; DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType << " / " << WidestType << " bits.\n"); - DEBUG(dbgs() << "LV: The Widest register is: " - << WidestRegister << " bits.\n"); + DEBUG(dbgs() << "LV: The Widest register is: " << WidestRegister + << " bits.\n"); if (MaxVectorSize == 0) { DEBUG(dbgs() << "LV: The target has no vector registers.\n"); @@ -5201,7 +5170,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { } assert(MaxVectorSize <= 64 && "Did not expect to pack so many elements" - " into one vector!"); + " into one vector!"); unsigned VF = MaxVectorSize; if (MaximizeBandwidth && !OptForSize) { @@ -5229,9 +5198,9 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { if (OptForSize) { // If we are unable to calculate the trip count then don't try to vectorize. if (TC < 2) { - emitAnalysis - (VectorizationReport() << - "unable to calculate the loop count due to complex control flow"); + emitAnalysis( + VectorizationReport() + << "unable to calculate the loop count due to complex control flow"); DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n"); return Factor; } @@ -5244,11 +5213,11 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { else { // If the trip count that we found modulo the vectorization factor is not // zero then we require a tail. - emitAnalysis(VectorizationReport() << - "cannot optimize for size and vectorize at the " - "same time. Enable vectorization of this loop " - "with '#pragma clang loop vectorize(enable)' " - "when compiling with -Os/-Oz"); + emitAnalysis(VectorizationReport() + << "cannot optimize for size and vectorize at the " + "same time. Enable vectorization of this loop " + "with '#pragma clang loop vectorize(enable)' " + "when compiling with -Os/-Oz"); DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n"); return Factor; } @@ -5277,17 +5246,18 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { Cost = expectedCost(Width).first / (float)Width; } - for (unsigned i=2; i <= VF; i*=2) { + for (unsigned i = 2; i <= VF; i *= 2) { // Notice that the vector loop needs to be executed less times, so // we need to divide the cost of the vector loops by the width of // the vector elements. VectorizationCostTy C = expectedCost(i); float VectorCost = C.first / (float)i; - DEBUG(dbgs() << "LV: Vector loop of width " << i << " costs: " << - (int)VectorCost << ".\n"); + DEBUG(dbgs() << "LV: Vector loop of width " << i + << " costs: " << (int)VectorCost << ".\n"); if (!C.second && !ForceVectorization) { - DEBUG(dbgs() << "LV: Not considering vector loop of width " << i << - " because it will not generate any vector instructions.\n"); + DEBUG( + dbgs() << "LV: Not considering vector loop of width " << i + << " because it will not generate any vector instructions.\n"); continue; } if (VectorCost < Cost) { @@ -5299,7 +5269,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { DEBUG(if (ForceVectorization && Width > 1 && Cost >= ScalarCost) dbgs() << "LV: Vectorization seems to be not beneficial, " << "but was forced by a user.\n"); - DEBUG(dbgs() << "LV: Selecting VF: "<< Width << ".\n"); + DEBUG(dbgs() << "LV: Selecting VF: " << Width << ".\n"); Factor.Width = Width; Factor.Cost = Width * Cost; return Factor; @@ -5313,7 +5283,8 @@ LoopVectorizationCostModel::getSmallestAndWidestTypes() { // For each block. for (Loop::block_iterator bb = TheLoop->block_begin(), - be = TheLoop->block_end(); bb != be; ++bb) { + be = TheLoop->block_end(); + bb != be; ++bb) { BasicBlock *BB = *bb; // For each instruction in the loop. @@ -5389,8 +5360,8 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, return 1; unsigned TargetNumRegisters = TTI.getNumberOfRegisters(VF > 1); - DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters << - " registers\n"); + DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters + << " registers\n"); if (VF == 1) { if (ForceTargetNumScalarRegs.getNumOccurrences() > 0) @@ -5479,8 +5450,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, // by this point), we can increase the critical path length if the loop // we're interleaving is inside another loop. Limit, by default to 2, so the // critical path only gets increased by one reduction operation. - if (Legal->getReductionVars()->size() && - TheLoop->getLoopDepth() > 1) { + if (Legal->getReductionVars()->size() && TheLoop->getLoopDepth() > 1) { unsigned F = static_cast(MaxNestedScalarReductionIC); SmallIC = std::min(SmallIC, F); StoresIC = std::min(StoresIC, F); @@ -5537,20 +5507,20 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef VFs) { // Each 'key' in the map opens a new interval. The values // of the map are the index of the 'last seen' usage of the // instruction that is the key. - typedef DenseMap IntervalMap; + typedef DenseMap IntervalMap; // Maps instruction to its index. - DenseMap IdxToInstr; + DenseMap IdxToInstr; // Marks the end of each interval. IntervalMap EndPoint; // Saves the list of instruction indices that are used in the loop. - SmallSet Ends; + SmallSet Ends; // Saves the list of values that are used in the loop but are // defined outside the loop, such as arguments and constants. - SmallPtrSet LoopInvariants; + SmallPtrSet LoopInvariants; unsigned Index = 0; - for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(), - be = DFS.endRPO(); bb != be; ++bb) { + for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(), be = DFS.endRPO(); + bb != be; ++bb) { RU.NumInstructions += (*bb)->size(); for (Instruction &I : **bb) { IdxToInstr[Index++] = &I; @@ -5561,7 +5531,8 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef VFs) { Instruction *Instr = dyn_cast(U); // Ignore non-instruction values such as arguments, constants, etc. - if (!Instr) continue; + if (!Instr) + continue; // If this instruction is outside the loop then record it and continue. if (!TheLoop->contains(Instr)) { @@ -5577,15 +5548,15 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef VFs) { } // Saves the list of intervals that end with the index in 'key'. - typedef SmallVector InstrList; + typedef SmallVector InstrList; DenseMap TransposeEnds; // Transpose the EndPoints to a list of values that end at each index. - for (IntervalMap::iterator it = EndPoint.begin(), e = EndPoint.end(); - it != e; ++it) + for (IntervalMap::iterator it = EndPoint.begin(), e = EndPoint.end(); it != e; + ++it) TransposeEnds[it->second].push_back(it->first); - SmallSet OpenIntervals; + SmallSet OpenIntervals; // Get the size of the widest register. unsigned MaxSafeDepDist = -1U; @@ -5611,7 +5582,8 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef VFs) { for (unsigned int i = 0; i < Index; ++i) { Instruction *I = IdxToInstr[i]; // Ignore instructions that are never used within the loop. - if (!Ends.count(I)) continue; + if (!Ends.count(I)) + continue; // Skip ignored values. if (ValuesToIgnore.count(I)) @@ -5652,7 +5624,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef VFs) { Invariant += GetRegUsage(Inst->getType(), VFs[i]); } - DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n'); + DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n'); DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsages[i] << '\n'); DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << '\n'); DEBUG(dbgs() << "LV(REG): LoopSize: " << RU.NumInstructions << '\n'); @@ -5671,7 +5643,8 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) { // For each block. for (Loop::block_iterator bb = TheLoop->block_begin(), - be = TheLoop->block_end(); bb != be; ++bb) { + be = TheLoop->block_end(); + bb != be; ++bb) { VectorizationCostTy BlockCost; BasicBlock *BB = *bb; @@ -5693,8 +5666,8 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) { BlockCost.first += C.first; BlockCost.second |= C.second; - DEBUG(dbgs() << "LV: Found an estimated cost of " << C.first << - " for VF " << VF << " For instruction: " << *it << '\n'); + DEBUG(dbgs() << "LV: Found an estimated cost of " << C.first << " for VF " + << VF << " For instruction: " << *it << '\n'); } // We assume that if-converted blocks have a 50% chance of being executed. @@ -5720,7 +5693,7 @@ static bool isGatherOrScatterLegal(Instruction *I, Value *Ptr, LoopVectorizationLegality *Legal) { Type *DataTy = cast(Ptr->getType())->getElementType(); return (isa(I) && Legal->isLegalMaskedGather(DataTy)) || - (isa(I) && Legal->isLegalMaskedScatter(DataTy)); + (isa(I) && Legal->isLegalMaskedScatter(DataTy)); } /// \brief Check whether the address computation for a non-consecutive memory @@ -5791,14 +5764,14 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { Type *VectorTy; unsigned C = getInstructionCost(I, VF, VectorTy); - bool TypeNotScalarized = VF > 1 && !VectorTy->isVoidTy() && - TTI.getNumberOfParts(VectorTy) < VF; + bool TypeNotScalarized = + VF > 1 && !VectorTy->isVoidTy() && TTI.getNumberOfParts(VectorTy) < VF; return VectorizationCostTy(C, TypeNotScalarized); } -unsigned -LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF, - Type *&VectorTy) { +unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, + unsigned VF, + Type *&VectorTy) { Type *RetTy = I->getType(); if (VF > 1 && MinBWs.count(I)) RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]); @@ -5850,9 +5823,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF, // Certain instructions can be cheaper to vectorize if they have a constant // second vector operand. One example of this are shifts on x86. TargetTransformInfo::OperandValueKind Op1VK = - TargetTransformInfo::OK_AnyValue; + TargetTransformInfo::OK_AnyValue; TargetTransformInfo::OperandValueKind Op2VK = - TargetTransformInfo::OK_AnyValue; + TargetTransformInfo::OK_AnyValue; TargetTransformInfo::OperandValueProperties Op1VP = TargetTransformInfo::OP_None; TargetTransformInfo::OperandValueProperties Op2VP = @@ -5903,28 +5876,27 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF, case Instruction::Load: { StoreInst *SI = dyn_cast(I); LoadInst *LI = dyn_cast(I); - Type *ValTy = (SI ? SI->getValueOperand()->getType() : - LI->getType()); + Type *ValTy = (SI ? SI->getValueOperand()->getType() : LI->getType()); VectorTy = ToVectorTy(ValTy, VF); unsigned Alignment = SI ? SI->getAlignment() : LI->getAlignment(); - unsigned AS = SI ? SI->getPointerAddressSpace() : - LI->getPointerAddressSpace(); + unsigned AS = + SI ? SI->getPointerAddressSpace() : LI->getPointerAddressSpace(); Value *Ptr = SI ? SI->getPointerOperand() : LI->getPointerOperand(); // We add the cost of address computation here instead of with the gep // instruction because only here we know whether the operation is // scalarized. if (VF == 1) return TTI.getAddressComputationCost(VectorTy) + - TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS); + TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS); if (LI && Legal->isUniform(Ptr)) { // Scalar load + broadcast unsigned Cost = TTI.getAddressComputationCost(ValTy->getScalarType()); Cost += TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), Alignment, AS); - return Cost + TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, - ValTy); + return Cost + + TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, ValTy); } // For an interleaved access, calculate the total cost of the whole @@ -5969,8 +5941,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF, // Scalarized loads/stores. int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); - bool UseGatherOrScatter = (ConsecutiveStride == 0) && - isGatherOrScatterLegal(I, Ptr, Legal); + bool UseGatherOrScatter = + (ConsecutiveStride == 0) && isGatherOrScatterLegal(I, Ptr, Legal); bool Reverse = ConsecutiveStride < 0; const DataLayout &DL = I->getModule()->getDataLayout(); @@ -5979,7 +5951,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF, if ((!ConsecutiveStride && !UseGatherOrScatter) || ScalarAllocatedSize != VectorElementSize) { bool IsComplexComputation = - isLikelyComplexAddressComputation(Ptr, Legal, SE, TheLoop); + isLikelyComplexAddressComputation(Ptr, Legal, SE, TheLoop); unsigned Cost = 0; // The cost of extracting from the value vector and pointer vector. Type *PtrTy = ToVectorTy(Ptr->getType(), VF); @@ -5989,15 +5961,16 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF, // In case of STORE, the cost of ExtractElement from the vector. // In case of LOAD, the cost of InsertElement into the returned // vector. - Cost += TTI.getVectorInstrCost(SI ? Instruction::ExtractElement : - Instruction::InsertElement, - VectorTy, i); + Cost += TTI.getVectorInstrCost(SI ? Instruction::ExtractElement + : Instruction::InsertElement, + VectorTy, i); } // The cost of the scalar loads/stores. Cost += VF * TTI.getAddressComputationCost(PtrTy, IsComplexComputation); - Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), - Alignment, AS); + Cost += VF * + TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), + Alignment, AS); return Cost; } @@ -6006,19 +5979,18 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF, assert(ConsecutiveStride == 0 && "Gather/Scatter are not used for consecutive stride"); return Cost + - TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr, - Legal->isMaskRequired(I), Alignment); + TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr, + Legal->isMaskRequired(I), Alignment); } // Wide load/stores. if (Legal->isMaskRequired(I)) - Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, - AS); + Cost += + TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS); else Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS); if (Reverse) - Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, - VectorTy, 0); + Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0); return Cost; } case Instruction::ZExt: @@ -6051,13 +6023,13 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF, Type *MinVecTy = VectorTy; if (I->getOpcode() == Instruction::Trunc) { SrcVecTy = smallestIntegerVectorType(SrcVecTy, MinVecTy); - VectorTy = largestIntegerVectorType(ToVectorTy(I->getType(), VF), - MinVecTy); + VectorTy = + largestIntegerVectorType(ToVectorTy(I->getType(), VF), MinVecTy); } else if (I->getOpcode() == Instruction::ZExt || I->getOpcode() == Instruction::SExt) { SrcVecTy = largestIntegerVectorType(SrcVecTy, MinVecTy); - VectorTy = smallestIntegerVectorType(ToVectorTy(I->getType(), VF), - MinVecTy); + VectorTy = + smallestIntegerVectorType(ToVectorTy(I->getType(), VF), MinVecTy); } } @@ -6078,10 +6050,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF, unsigned Cost = 0; if (!RetTy->isVoidTy() && VF != 1) { - unsigned InsCost = TTI.getVectorInstrCost(Instruction::InsertElement, - VectorTy); - unsigned ExtCost = TTI.getVectorInstrCost(Instruction::ExtractElement, - VectorTy); + unsigned InsCost = + TTI.getVectorInstrCost(Instruction::InsertElement, VectorTy); + unsigned ExtCost = + TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy); // The cost of inserting the results plus extracting each one of the // operands. @@ -6093,7 +6065,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF, Cost += VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy); return Cost; } - }// end of switch. + } // end of switch. } char LoopVectorize::ID = 0; @@ -6115,9 +6087,9 @@ INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass) INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false) namespace llvm { - Pass *createLoopVectorizePass(bool NoUnrolling, bool AlwaysVectorize) { - return new LoopVectorize(NoUnrolling, AlwaysVectorize); - } +Pass *createLoopVectorizePass(bool NoUnrolling, bool AlwaysVectorize) { + return new LoopVectorize(NoUnrolling, AlwaysVectorize); +} } bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) { @@ -6132,7 +6104,6 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) { return false; } - void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr, bool IfPredicateStore) { assert(!Instr->getType()->isAggregateType() && "Can't handle vectors"); @@ -6174,8 +6145,7 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr, // Does this instruction return a value ? bool IsVoidRetTy = Instr->getType()->isVoidTy(); - Value *UndefVec = IsVoidRetTy ? nullptr : - UndefValue::get(Instr->getType()); + Value *UndefVec = IsVoidRetTy ? nullptr : UndefValue::get(Instr->getType()); // Create a new entry in the WidenMap and initialize it to Undef or Null. VectorParts &VecResults = WidenMap.splat(Instr, UndefVec); @@ -6202,31 +6172,30 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr, } Instruction *Cloned = Instr->clone(); - if (!IsVoidRetTy) - Cloned->setName(Instr->getName() + ".cloned"); - // Replace the operands of the cloned instructions with extracted scalars. - for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) { - Value *Op = Params[op][Part]; - Cloned->setOperand(op, Op); - } + if (!IsVoidRetTy) + Cloned->setName(Instr->getName() + ".cloned"); + // Replace the operands of the cloned instructions with extracted scalars. + for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) { + Value *Op = Params[op][Part]; + Cloned->setOperand(op, Op); + } - // Place the cloned scalar in the new loop. - Builder.Insert(Cloned); + // Place the cloned scalar in the new loop. + Builder.Insert(Cloned); - // If we just cloned a new assumption, add it the assumption cache. - if (auto *II = dyn_cast(Cloned)) - if (II->getIntrinsicID() == Intrinsic::assume) - AC->registerAssumption(II); + // If we just cloned a new assumption, add it the assumption cache. + if (auto *II = dyn_cast(Cloned)) + if (II->getIntrinsicID() == Intrinsic::assume) + AC->registerAssumption(II); - // If the original scalar returns a value we need to place it in a vector - // so that future users will be able to use it. - if (!IsVoidRetTy) - VecResults[Part] = Cloned; + // If the original scalar returns a value we need to place it in a vector + // so that future users will be able to use it. + if (!IsVoidRetTy) + VecResults[Part] = Cloned; - // End if-block. - if (IfPredicateStore) - PredicatedStores.push_back(std::make_pair(cast(Cloned), - Cmp)); + // End if-block. + if (IfPredicateStore) + PredicatedStores.push_back(std::make_pair(cast(Cloned), Cmp)); } } @@ -6237,13 +6206,9 @@ void InnerLoopUnroller::vectorizeMemoryInstruction(Instruction *Instr) { return scalarizeInstruction(Instr, IfPredicateStore); } -Value *InnerLoopUnroller::reverseVector(Value *Vec) { - return Vec; -} +Value *InnerLoopUnroller::reverseVector(Value *Vec) { return Vec; } -Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { - return V; -} +Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; } Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, Value *Step) { // When unrolling and the VF is 1, we only need to add a simple scalar. -- 2.50.1