From 8b3e014d452e902463a28f7950cc7ff7049f8f70 Mon Sep 17 00:00:00 2001
From: Dehao Chen <dehao@google.com>
Date: Thu, 5 May 2016 00:54:54 +0000
Subject: [PATCH] clang-format some files in preparation of coming patch
 reviews.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268583 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Transforms/Utils/LoopUtils.h  |   21 +-
 lib/Transforms/Scalar/LICM.cpp             |  400 ++++----
 lib/Transforms/Scalar/LoopUnrollPass.cpp   |   61 +-
 lib/Transforms/Vectorize/LoopVectorize.cpp | 1009 ++++++++++----------
 4 files changed, 724 insertions(+), 767 deletions(-)
diff --git a/include/llvm/Transforms/Utils/LoopUtils.h b/include/llvm/Transforms/Utils/LoopUtils.h
index ff0d673d16e..b91a313ea02 100644
--- a/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/include/llvm/Transforms/Utils/LoopUtils.h
@@ -37,13 +37,12 @@ class TargetLibraryInfo;
 /// \brief Captures loop safety information.
 /// It keep information for loop & its header may throw exception.
 struct LICMSafetyInfo {
-  bool MayThrow;           // The current loop contains an instruction which
-                           // may throw.
-  bool HeaderMayThrow;     // Same as previous, but specific to loop header
+  bool MayThrow;       // The current loop contains an instruction which
+                       // may throw.
+  bool HeaderMayThrow; // Same as previous, but specific to loop header
   // Used to update funclet bundle operands.
   DenseMap<BasicBlock *, ColorVector> BlockColors;
-  LICMSafetyInfo() : MayThrow(false), HeaderMayThrow(false)
-  {}
+  LICMSafetyInfo() : MayThrow(false), HeaderMayThrow(false) {}
 };
 
 /// The RecurrenceDescriptor is used to identify recurrences variables in a
@@ -268,7 +267,7 @@ public:
 public:
   /// Default constructor - creates an invalid induction.
   InductionDescriptor()
-    : StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {}
+      : StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {}
 
   /// Get the consecutive direction. Returns:
   ///   0 - unknown or non-consecutive.
@@ -325,8 +324,7 @@ bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE,
 /// If ScalarEvolution is passed in, it will be preserved.
 ///
 /// Returns true if any modifications are made to the loop.
-bool formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
-               ScalarEvolution *SE);
+bool formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE);
 
 /// \brief Put a loop nest into LCSSA form.
 ///
@@ -370,8 +368,8 @@ bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
 /// insertion point vector, PredIteratorCache, LoopInfo, DominatorTree, Loop,
 /// AliasSet information for all instructions of the loop and loop safety
 /// information as arguments. It returns changed status.
-bool promoteLoopAccessesToScalars(AliasSet &, SmallVectorImpl<BasicBlock*> &,
-                                  SmallVectorImpl<Instruction*> &,
+bool promoteLoopAccessesToScalars(AliasSet &, SmallVectorImpl<BasicBlock *> &,
+                                  SmallVectorImpl<Instruction *> &,
                                   PredIteratorCache &, LoopInfo *,
                                   DominatorTree *, const TargetLibraryInfo *,
                                   Loop *, AliasSetTracker *, LICMSafetyInfo *);
@@ -394,7 +392,7 @@ Optional<const MDOperand *> findStringMetadataForLoop(Loop *TheLoop,
                                                       StringRef Name);
 
 /// \brief Set input string into loop metadata by keeping other values intact.
-void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, 
+void addStringMetadataToLoop(Loop *TheLoop, const char *MDString,
                              unsigned V = 0);
 
 /// Helper to consistently add the set of standard passes to a loop pass's \c
@@ -403,7 +401,6 @@ void addStringMetadataToLoop(Loop *TheLoop, const char *MDString,
 /// All loop passes should call this as part of implementing their \c
 /// getAnalysisUsage.
 void getLoopAnalysisUsage(AnalysisUsage &AU);
-
 }
 
 #endif
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index e6ad42bb4c3..d2969453dff 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -30,7 +30,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AliasSetTracker.h"
@@ -59,6 +58,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -67,15 +67,15 @@ using namespace llvm;
 
 #define DEBUG_TYPE "licm"
 
-STATISTIC(NumSunk      , "Number of instructions sunk out of loop");
-STATISTIC(NumHoisted   , "Number of instructions hoisted out of loop");
+STATISTIC(NumSunk, "Number of instructions sunk out of loop");
+STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
-STATISTIC(NumPromoted  , "Number of memory locations promoted to registers");
+STATISTIC(NumPromoted, "Number of memory locations promoted to registers");
 
 static cl::opt<bool>
-DisablePromotion("disable-licm-promotion", cl::Hidden,
-                 cl::desc("Disable memory promotion in LICM pass"));
+    DisablePromotion("disable-licm-promotion", cl::Hidden,
+                     cl::desc("Disable memory promotion in LICM pass"));
 
 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
 static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
@@ -86,8 +86,7 @@ static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
                  const Loop *CurLoop, AliasSetTracker *CurAST,
                  const LICMSafetyInfo *SafetyInfo);
 static bool isGuaranteedToExecute(const Instruction &Inst,
-                                  const DominatorTree *DT,
-                                  const Loop *CurLoop,
+                                  const DominatorTree *DT, const Loop *CurLoop,
                                   const LICMSafetyInfo *SafetyInfo);
 static bool isSafeToExecuteUnconditionally(const Instruction &Inst,
                                            const DominatorTree *DT,
@@ -96,7 +95,7 @@ static bool isSafeToExecuteUnconditionally(const Instruction &Inst,
                                            const LICMSafetyInfo *SafetyInfo,
                                            const Instruction *CtxI = nullptr);
 static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
-                                     const AAMDNodes &AAInfo, 
+                                     const AAMDNodes &AAInfo,
                                      AliasSetTracker *CurAST);
 static Instruction *
 CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
@@ -108,57 +107,57 @@ static bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA,
                                LICMSafetyInfo *SafetyInfo);
 
 namespace {
-  struct LICM : public LoopPass {
-    static char ID; // Pass identification, replacement for typeid
-    LICM() : LoopPass(ID) {
-      initializeLICMPass(*PassRegistry::getPassRegistry());
-    }
+struct LICM : public LoopPass {
+  static char ID; // Pass identification, replacement for typeid
+  LICM() : LoopPass(ID) {
+    initializeLICMPass(*PassRegistry::getPassRegistry());
+  }
 
-    bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
 
-    /// This transformation requires natural loop information & requires that
-    /// loop preheaders be inserted into the CFG...
-    ///
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.setPreservesCFG();
-      AU.addRequired<TargetLibraryInfoWrapperPass>();
-      getLoopAnalysisUsage(AU);
-    }
+  /// This transformation requires natural loop information & requires that
+  /// loop preheaders be inserted into the CFG...
+  ///
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+    getLoopAnalysisUsage(AU);
+  }
 
-    using llvm::Pass::doFinalization;
+  using llvm::Pass::doFinalization;
 
-    bool doFinalization() override {
-      assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets");
-      return false;
-    }
+  bool doFinalization() override {
+    assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets");
+    return false;
+  }
 
-  private:
-    AliasAnalysis *AA;       // Current AliasAnalysis information
-    LoopInfo      *LI;       // Current LoopInfo
-    DominatorTree *DT;       // Dominator Tree for the current Loop.
+private:
+  AliasAnalysis *AA; // Current AliasAnalysis information
+  LoopInfo *LI;      // Current LoopInfo
+  DominatorTree *DT; // Dominator Tree for the current Loop.
 
-    TargetLibraryInfo *TLI;  // TargetLibraryInfo for constant folding.
+  TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding.
 
-    // State that is updated as we process loops.
-    bool Changed;            // Set to true when we change anything.
-    BasicBlock *Preheader;   // The preheader block of the current loop...
-    Loop *CurLoop;           // The current loop we are working on...
-    AliasSetTracker *CurAST; // AliasSet information for the current loop...
-    DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap;
+  // State that is updated as we process loops.
+  bool Changed;            // Set to true when we change anything.
+  BasicBlock *Preheader;   // The preheader block of the current loop...
+  Loop *CurLoop;           // The current loop we are working on...
+  AliasSetTracker *CurAST; // AliasSet information for the current loop...
+  DenseMap<Loop *, AliasSetTracker *> LoopToAliasSetMap;
 
-    /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
-    void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
-                                 Loop *L) override;
+  /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
+  void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
+                               Loop *L) override;
 
-    /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
-    /// set.
-    void deleteAnalysisValue(Value *V, Loop *L) override;
+  /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
+  /// set.
+  void deleteAnalysisValue(Value *V, Loop *L) override;
 
-    /// Simple Analysis hook. Delete loop L from alias set map.
-    void deleteAnalysisLoop(Loop *L) override;
+  /// Simple Analysis hook. Delete loop L from alias set map.
+  void deleteAnalysisLoop(Loop *L) override;
 
-    AliasSetTracker *collectAliasInfoForLoop(Loop *L);
-  };
+  AliasSetTracker *collectAliasInfoForLoop(Loop *L);
+};
 }
 
 char LICM::ID = 0;
@@ -225,9 +224,9 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
 
     // Loop over all of the alias sets in the tracker object.
     for (AliasSet &AS : *CurAST)
-      Changed |= promoteLoopAccessesToScalars(AS, ExitBlocks, InsertPts,
-                                              PIC, LI, DT, TLI, CurLoop,
-                                              CurAST, &SafetyInfo);
+      Changed |=
+          promoteLoopAccessesToScalars(AS, ExitBlocks, InsertPts, PIC, LI, DT,
+                                       TLI, CurLoop, CurAST, &SafetyInfo);
 
     // Once we have promoted values across the loop body we have to recursively
     // reform LCSSA as any nested loop may now have values defined within the
@@ -266,7 +265,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
 }
 
 /// Walk the specified region of the CFG (defined by all blocks dominated by
-/// the specified block, and that are in the current loop) in reverse depth 
+/// the specified block, and that are in the current loop) in reverse depth
 /// first order w.r.t the DominatorTree.  This allows us to visit uses before
 /// definitions, allowing us to sink a loop body in one pass without iteration.
 ///
@@ -275,25 +274,27 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
                       AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) {
 
   // Verify inputs.
-  assert(N != nullptr && AA != nullptr && LI != nullptr && 
-         DT != nullptr && CurLoop != nullptr && CurAST != nullptr && 
-         SafetyInfo != nullptr && "Unexpected input to sinkRegion");
+  assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
+         CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr &&
+         "Unexpected input to sinkRegion");
 
   BasicBlock *BB = N->getBlock();
   // If this subregion is not in the top level loop at all, exit.
-  if (!CurLoop->contains(BB)) return false;
+  if (!CurLoop->contains(BB))
+    return false;
 
   // We are processing blocks in reverse dfo, so process children first.
   bool Changed = false;
-  const std::vector<DomTreeNode*> &Children = N->getChildren();
+  const std::vector<DomTreeNode *> &Children = N->getChildren();
   for (DomTreeNode *Child : Children)
     Changed |= sinkRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
 
   // Only need to process the contents of this block if it is not part of a
   // subloop (which would already have been processed).
-  if (inSubLoop(BB,CurLoop,LI)) return Changed;
+  if (inSubLoop(BB, CurLoop, LI))
+    return Changed;
 
-  for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) {
+  for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
     Instruction &I = *--II;
 
     // If the instruction is dead, we would try to sink it because it isn't used
@@ -330,20 +331,21 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
                        DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
                        AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) {
   // Verify inputs.
-  assert(N != nullptr && AA != nullptr && LI != nullptr && 
-         DT != nullptr && CurLoop != nullptr && CurAST != nullptr && 
-         SafetyInfo != nullptr && "Unexpected input to hoistRegion");
+  assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
+         CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr &&
+         "Unexpected input to hoistRegion");
 
   BasicBlock *BB = N->getBlock();
 
   // If this subregion is not in the top level loop at all, exit.
-  if (!CurLoop->contains(BB)) return false;
+  if (!CurLoop->contains(BB))
+    return false;
 
   // Only need to process the contents of this block if it is not part of a
   // subloop (which would already have been processed).
   bool Changed = false;
   if (!inSubLoop(BB, CurLoop, LI))
-    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) {
+    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
       Instruction &I = *II++;
       // Try constant folding this instruction.  If all the operands are
       // constants, it is technically hoistable, but it would be better to just
@@ -364,12 +366,13 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
       //
       if (CurLoop->hasLoopInvariantOperands(&I) &&
           canSinkOrHoistInst(I, AA, DT, TLI, CurLoop, CurAST, SafetyInfo) &&
-          isSafeToExecuteUnconditionally(I, DT, TLI, CurLoop, SafetyInfo,
-                                 CurLoop->getLoopPreheader()->getTerminator()))
+          isSafeToExecuteUnconditionally(
+              I, DT, TLI, CurLoop, SafetyInfo,
+              CurLoop->getLoopPreheader()->getTerminator()))
         Changed |= hoist(I, DT, CurLoop, SafetyInfo);
     }
 
-  const std::vector<DomTreeNode*> &Children = N->getChildren();
+  const std::vector<DomTreeNode *> &Children = N->getChildren();
   for (DomTreeNode *Child : Children)
     Changed |= hoistRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
   return Changed;
@@ -378,7 +381,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
 /// Computes loop safety information, checks loop body & header
 /// for the possibility of may throw exception.
 ///
-void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) {
+void llvm::computeLICMSafetyInfo(LICMSafetyInfo *SafetyInfo, Loop *CurLoop) {
   assert(CurLoop != nullptr && "CurLoop cant be null");
   BasicBlock *Header = CurLoop->getHeader();
   // Setting default safety values.
@@ -388,11 +391,12 @@ void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) {
   for (BasicBlock::iterator I = Header->begin(), E = Header->end();
        (I != E) && !SafetyInfo->HeaderMayThrow; ++I)
     SafetyInfo->HeaderMayThrow |= I->mayThrow();
-  
+
   SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow;
-  // Iterate over loop instructions and compute safety info. 
-  for (Loop::block_iterator BB = CurLoop->block_begin(), 
-       BBE = CurLoop->block_end(); (BB != BBE) && !SafetyInfo->MayThrow ; ++BB)
+  // Iterate over loop instructions and compute safety info.
+  for (Loop::block_iterator BB = CurLoop->block_begin(),
+                            BBE = CurLoop->block_end();
+       (BB != BBE) && !SafetyInfo->MayThrow; ++BB)
     for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end();
          (I != E) && !SafetyInfo->MayThrow; ++I)
       SafetyInfo->MayThrow |= I->mayThrow();
@@ -415,7 +419,7 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
   // Loads have extra constraints we have to verify before we can hoist them.
   if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
     if (!LI->isUnordered())
-      return false;        // Don't hoist volatile/atomic loads!
+      return false; // Don't hoist volatile/atomic loads!
 
     // Loads from constant memory are always safe to move, even if they end up
     // in the same alias set as something that ends up being modified.
@@ -467,7 +471,8 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
           break;
         }
       }
-      if (!FoundMod) return true;
+      if (!FoundMod)
+        return true;
     }
 
     // FIXME: This should use mod/ref information to see if we can hoist or
@@ -486,7 +491,7 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
 
   // TODO: Plumb the context instruction through to make hoisting and sinking
   // more powerful. Hoisting of loads already works due to the special casing
-  // above. 
+  // above.
   return isSafeToExecuteUnconditionally(I, DT, TLI, CurLoop, SafetyInfo,
                                         nullptr);
 }
@@ -589,7 +594,8 @@ CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
   }
 
   ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
-  if (!I.getName().empty()) New->setName(I.getName() + ".le");
+  if (!I.getName().empty())
+    New->setName(I.getName() + ".le");
 
   // Build LCSSA PHI nodes for any in-loop operands. Note that this is
   // particularly cheap because we can rip off the PHI node that we're
@@ -623,15 +629,17 @@ static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
                  const LICMSafetyInfo *SafetyInfo) {
   DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
   bool Changed = false;
-  if (isa<LoadInst>(I)) ++NumMovedLoads;
-  else if (isa<CallInst>(I)) ++NumMovedCalls;
+  if (isa<LoadInst>(I))
+    ++NumMovedLoads;
+  else if (isa<CallInst>(I))
+    ++NumMovedCalls;
   ++NumSunk;
   Changed = true;
 
 #ifndef NDEBUG
   SmallVector<BasicBlock *, 32> ExitBlocks;
   CurLoop->getUniqueExitBlocks(ExitBlocks);
-  SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), 
+  SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
                                              ExitBlocks.end());
 #endif
 
@@ -688,8 +696,8 @@ static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
 static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
                   const LICMSafetyInfo *SafetyInfo) {
   auto *Preheader = CurLoop->getLoopPreheader();
-  DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": "
-        << I << "\n");
+  DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I
+               << "\n");
 
   // Metadata can be dependent on conditions we are hoisting above.
   // Conservatively strip all metadata on the instruction unless we were
@@ -705,8 +713,10 @@ static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
   // Move the new node to the Preheader, before its terminator.
   I.moveBefore(Preheader->getTerminator());
 
-  if (isa<LoadInst>(I)) ++NumMovedLoads;
-  else if (isa<CallInst>(I)) ++NumMovedCalls;
+  if (isa<LoadInst>(I))
+    ++NumMovedLoads;
+  else if (isa<CallInst>(I))
+    ++NumMovedCalls;
   ++NumHoisted;
   return true;
 }
@@ -714,7 +724,7 @@ static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
 /// Only sink or hoist an instruction if it is not a trapping instruction,
 /// or if the instruction is known not to trap when moved to the preheader.
 /// or if it is a trapping instruction and is guaranteed to execute.
-static bool isSafeToExecuteUnconditionally(const Instruction &Inst, 
+static bool isSafeToExecuteUnconditionally(const Instruction &Inst,
                                            const DominatorTree *DT,
                                            const TargetLibraryInfo *TLI,
                                            const Loop *CurLoop,
@@ -727,9 +737,8 @@ static bool isSafeToExecuteUnconditionally(const Instruction &Inst,
 }
 
 static bool isGuaranteedToExecute(const Instruction &Inst,
-                                  const DominatorTree *DT,
-                                  const Loop *CurLoop,
-                                  const LICMSafetyInfo * SafetyInfo) {
+                                  const DominatorTree *DT, const Loop *CurLoop,
+                                  const LICMSafetyInfo *SafetyInfo) {
 
   // We have to check to make sure that the instruction dominates all
   // of the exit blocks.  If it doesn't, then there is a path out of the loop
@@ -749,7 +758,7 @@ static bool isGuaranteedToExecute(const Instruction &Inst,
     return false;
 
   // Get the exit blocks for the current loop.
-  SmallVector<BasicBlock*, 8> ExitBlocks;
+  SmallVector<BasicBlock *, 8> ExitBlocks;
   CurLoop->getExitBlocks(ExitBlocks);
 
   // Verify that the block dominates each of the exit blocks of the loop.
@@ -766,82 +775,79 @@ static bool isGuaranteedToExecute(const Instruction &Inst,
 }
 
 namespace {
-  class LoopPromoter : public LoadAndStorePromoter {
-    Value *SomePtr;  // Designated pointer to store to.
-    SmallPtrSetImpl<Value*> &PointerMustAliases;
-    SmallVectorImpl<BasicBlock*> &LoopExitBlocks;
-    SmallVectorImpl<Instruction*> &LoopInsertPts;
-    PredIteratorCache &PredCache;
-    AliasSetTracker &AST;
-    LoopInfo &LI;
-    DebugLoc DL;
-    int Alignment;
-    AAMDNodes AATags;
-
-    Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
-      if (Instruction *I = dyn_cast<Instruction>(V))
-        if (Loop *L = LI.getLoopFor(I->getParent()))
-          if (!L->contains(BB)) {
-            // We need to create an LCSSA PHI node for the incoming value and
-            // store that.
-            PHINode *PN =
-                PHINode::Create(I->getType(), PredCache.size(BB),
-                                I->getName() + ".lcssa", &BB->front());
-            for (BasicBlock *Pred : PredCache.get(BB))
-              PN->addIncoming(I, Pred);
-            return PN;
-          }
-      return V;
-    }
+class LoopPromoter : public LoadAndStorePromoter {
+  Value *SomePtr; // Designated pointer to store to.
+  SmallPtrSetImpl<Value *> &PointerMustAliases;
+  SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
+  SmallVectorImpl<Instruction *> &LoopInsertPts;
+  PredIteratorCache &PredCache;
+  AliasSetTracker &AST;
+  LoopInfo &LI;
+  DebugLoc DL;
+  int Alignment;
+  AAMDNodes AATags;
 
-  public:
-    LoopPromoter(Value *SP,
-                 ArrayRef<const Instruction *> Insts,
-                 SSAUpdater &S, SmallPtrSetImpl<Value *> &PMA,
-                 SmallVectorImpl<BasicBlock *> &LEB,
-                 SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
-                 AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment,
-                 const AAMDNodes &AATags)
-        : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
-          LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast),
-          LI(li), DL(dl), Alignment(alignment), AATags(AATags) {}
-
-    bool isInstInList(Instruction *I,
-                      const SmallVectorImpl<Instruction*> &) const override {
-      Value *Ptr;
-      if (LoadInst *LI = dyn_cast<LoadInst>(I))
-        Ptr = LI->getOperand(0);
-      else
-        Ptr = cast<StoreInst>(I)->getPointerOperand();
-      return PointerMustAliases.count(Ptr);
-    }
+  Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
+    if (Instruction *I = dyn_cast<Instruction>(V))
+      if (Loop *L = LI.getLoopFor(I->getParent()))
+        if (!L->contains(BB)) {
+          // We need to create an LCSSA PHI node for the incoming value and
+          // store that.
+          PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
+                                        I->getName() + ".lcssa", &BB->front());
+          for (BasicBlock *Pred : PredCache.get(BB))
+            PN->addIncoming(I, Pred);
+          return PN;
+        }
+    return V;
+  }
 
-    void doExtraRewritesBeforeFinalDeletion() const override {
-      // Insert stores after in the loop exit blocks.  Each exit block gets a
-      // store of the live-out values that feed them.  Since we've already told
-      // the SSA updater about the defs in the loop and the preheader
-      // definition, it is all set and we can start using it.
-      for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
-        BasicBlock *ExitBlock = LoopExitBlocks[i];
-        Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
-        LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
-        Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
-        Instruction *InsertPos = LoopInsertPts[i];
-        StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
-        NewSI->setAlignment(Alignment);
-        NewSI->setDebugLoc(DL);
-        if (AATags) NewSI->setAAMetadata(AATags);
-      }
-    }
+public:
+  LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
+               SmallPtrSetImpl<Value *> &PMA,
+               SmallVectorImpl<BasicBlock *> &LEB,
+               SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
+               AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment,
+               const AAMDNodes &AATags)
+      : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
+        LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast),
+        LI(li), DL(dl), Alignment(alignment), AATags(AATags) {}
+
+  bool isInstInList(Instruction *I,
+                    const SmallVectorImpl<Instruction *> &) const override {
+    Value *Ptr;
+    if (LoadInst *LI = dyn_cast<LoadInst>(I))
+      Ptr = LI->getOperand(0);
+    else
+      Ptr = cast<StoreInst>(I)->getPointerOperand();
+    return PointerMustAliases.count(Ptr);
+  }
 
-    void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
-      // Update alias analysis.
-      AST.copyValue(LI, V);
+  void doExtraRewritesBeforeFinalDeletion() const override {
+    // Insert stores after in the loop exit blocks.  Each exit block gets a
+    // store of the live-out values that feed them.  Since we've already told
+    // the SSA updater about the defs in the loop and the preheader
+    // definition, it is all set and we can start using it.
+    for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
+      BasicBlock *ExitBlock = LoopExitBlocks[i];
+      Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
+      LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
+      Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
+      Instruction *InsertPos = LoopInsertPts[i];
+      StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
+      NewSI->setAlignment(Alignment);
+      NewSI->setDebugLoc(DL);
+      if (AATags)
+        NewSI->setAAMetadata(AATags);
     }
-    void instructionDeleted(Instruction *I) const override {
-      AST.deleteValue(I);
-    }
-  };
+  }
+
+  void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
+    // Update alias analysis.
+    AST.copyValue(LI, V);
+  }
+  void instructionDeleted(Instruction *I) const override { AST.deleteValue(I); }
+};
 } // end anon namespace
 
 /// Try to promote memory values to scalars by sinking stores out of the
@@ -849,19 +855,14 @@ namespace {
 /// the stores in the loop, looking for stores to Must pointers which are
 /// loop invariant.
 ///
-bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
-                                        SmallVectorImpl<BasicBlock*>&ExitBlocks,
-                                        SmallVectorImpl<Instruction*>&InsertPts,
-                                        PredIteratorCache &PIC, LoopInfo *LI, 
-                                        DominatorTree *DT,
-                                        const TargetLibraryInfo *TLI,
-                                        Loop *CurLoop,
-                                        AliasSetTracker *CurAST, 
-                                        LICMSafetyInfo * SafetyInfo) { 
+bool llvm::promoteLoopAccessesToScalars(
+    AliasSet &AS, SmallVectorImpl<BasicBlock *> &ExitBlocks,
+    SmallVectorImpl<Instruction *> &InsertPts, PredIteratorCache &PIC,
+    LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
+    Loop *CurLoop, AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) {
   // Verify inputs.
-  assert(LI != nullptr && DT != nullptr && 
-         CurLoop != nullptr && CurAST != nullptr && 
-         SafetyInfo != nullptr && 
+  assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
+         CurAST != nullptr && SafetyInfo != nullptr &&
          "Unexpected Input to promoteLoopAccessesToScalars");
 
   // We can promote this alias set if it has a store, if it is a "Must" alias
@@ -875,7 +876,7 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
          "Must alias set should have at least one pointer element in it!");
 
   Value *SomePtr = AS.begin()->getValue();
-  BasicBlock * Preheader = CurLoop->getLoopPreheader();
+  BasicBlock *Preheader = CurLoop->getLoopPreheader();
 
   // It isn't safe to promote a load/store from the loop if the load/store is
   // conditional.  For example, turning:
@@ -907,8 +908,8 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
   // since they're all must alias.
   bool CanSpeculateLoad = false;
 
-  SmallVector<Instruction*, 64> LoopUses;
-  SmallPtrSet<Value*, 4> PointerMustAliases;
+  SmallVector<Instruction *, 64> LoopUses;
+  SmallPtrSet<Value *, 4> PointerMustAliases;
 
   // We start with an alignment of one and try to find instructions that allow
   // us to prove better alignment.
@@ -923,7 +924,7 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
   // is available.
   if (!HasDedicatedExits || !Preheader)
     return false;
-  
+
   const DataLayout &MDL = Preheader->getModule()->getDataLayout();
 
   // Check that all of the pointers in the alias set have the same type.  We
@@ -954,10 +955,8 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
           return Changed;
 
         if (!GuaranteedToExecute && !CanSpeculateLoad)
-          CanSpeculateLoad =
-            isSafeToExecuteUnconditionally(*Load, DT, TLI, CurLoop,
-                                           SafetyInfo,
-                                           Preheader->getTerminator());
+          CanSpeculateLoad = isSafeToExecuteUnconditionally(
+              *Load, DT, TLI, CurLoop, SafetyInfo, Preheader->getTerminator());
       } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
         // Stores *of* the pointer are not interesting, only stores *to* the
         // pointer.
@@ -983,16 +982,13 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
           }
 
         if (!GuaranteedToExecute)
-          GuaranteedToExecute = isGuaranteedToExecute(*UI, DT, 
-                                                      CurLoop, SafetyInfo);
-
+          GuaranteedToExecute =
+              isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo);
 
         if (!GuaranteedToExecute && !CanSpeculateLoad) {
-          CanSpeculateLoad =
-            isDereferenceableAndAlignedPointer(Store->getPointerOperand(),
-                                               Store->getAlignment(), MDL,
-                                               Preheader->getTerminator(),
-                                               DT, TLI);
+          CanSpeculateLoad = isDereferenceableAndAlignedPointer(
+              Store->getPointerOperand(), Store->getAlignment(), MDL,
+              Preheader->getTerminator(), DT, TLI);
         }
       } else
         return Changed; // Not a load or store.
@@ -1014,10 +1010,10 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
   if (!PromotionIsLegal && CanSpeculateLoad) {
     // If this is a thread local location, then we can insert stores along
     // paths which originally didn't have them without violating the memory
-    // model. 
+    // model.
     Value *Object = GetUnderlyingObject(SomePtr, MDL);
-    PromotionIsLegal = isAllocLikeFn(Object, TLI) &&
-      !PointerMayBeCaptured(Object, true, true);
+    PromotionIsLegal =
+        isAllocLikeFn(Object, TLI) && !PointerMayBeCaptured(Object, true, true);
   }
   if (!PromotionIsLegal)
     return Changed;
@@ -1038,7 +1034,8 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
       return Changed;
 
   // Otherwise, this is safe to promote, lets do it!
-  DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');
+  DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr
+               << '\n');
   Changed = true;
   ++NumPromoted;
 
@@ -1049,20 +1046,19 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
   DebugLoc DL = LoopUses[0]->getDebugLoc();
 
   // We use the SSAUpdater interface to insert phi nodes as required.
-  SmallVector<PHINode*, 16> NewPHIs;
+  SmallVector<PHINode *, 16> NewPHIs;
   SSAUpdater SSA(&NewPHIs);
-  LoopPromoter Promoter(SomePtr, LoopUses, SSA,
-                        PointerMustAliases, ExitBlocks,
+  LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
                         InsertPts, PIC, *CurAST, *LI, DL, Alignment, AATags);
 
   // Set up the preheader to have a definition of the value.  It is the live-out
   // value from the preheader that uses in the loop will use.
-  LoadInst *PreheaderLoad =
-    new LoadInst(SomePtr, SomePtr->getName()+".promoted",
-                 Preheader->getTerminator());
+  LoadInst *PreheaderLoad = new LoadInst(
+      SomePtr, SomePtr->getName() + ".promoted", Preheader->getTerminator());
   PreheaderLoad->setAlignment(Alignment);
   PreheaderLoad->setDebugLoc(DL);
-  if (AATags) PreheaderLoad->setAAMetadata(AATags);
+  if (AATags)
+    PreheaderLoad->setAAMetadata(AATags);
   SSA.AddAvailableValue(Preheader, PreheaderLoad);
 
   // Rewrite all the loads in the loop and remember all the definitions from
@@ -1157,12 +1153,11 @@ void LICM::deleteAnalysisLoop(Loop *L) {
   LoopToAliasSetMap.erase(L);
 }
 
-
 /// Return true if the body of this loop may store into the memory
 /// location pointed to by V.
 ///
 static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
-                                     const AAMDNodes &AAInfo, 
+                                     const AAMDNodes &AAInfo,
                                      AliasSetTracker *CurAST) {
   // Check to see if any of the basic blocks in CurLoop invalidate *V.
   return CurAST->getAliasSetForPointer(V, Size, AAInfo).isMod();
@@ -1175,4 +1170,3 @@ static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
   assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
   return LI->getLoopFor(BB) != CurLoop;
 }
-
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 685ce9e5e8e..2e5c55e73a4 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -12,11 +12,10 @@
 // counts of loops easily.
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/SetVector.h"
-#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/LoopUnrollAnalyzer.h"
@@ -32,6 +31,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
 #include <climits>
@@ -60,34 +60,34 @@ static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze(
     cl::desc("Don't allow loop unrolling to simulate more than this number of"
              "iterations when checking full unroll profitability"));
 
-static cl::opt<unsigned>
-UnrollCount("unroll-count", cl::Hidden,
-  cl::desc("Use this unroll count for all loops including those with "
-           "unroll_count pragma values, for testing purposes"));
+static cl::opt<unsigned> UnrollCount(
+    "unroll-count", cl::Hidden,
+    cl::desc("Use this unroll count for all loops including those with "
+             "unroll_count pragma values, for testing purposes"));
 
-static cl::opt<unsigned>
-UnrollMaxCount("unroll-max-count", cl::Hidden,
-  cl::desc("Set the max unroll count for partial and runtime unrolling, for"
-           "testing purposes"));
+static cl::opt<unsigned> UnrollMaxCount(
+    "unroll-max-count", cl::Hidden,
+    cl::desc("Set the max unroll count for partial and runtime unrolling, for"
+             "testing purposes"));
 
-static cl::opt<unsigned>
-UnrollFullMaxCount("unroll-full-max-count", cl::Hidden,
-  cl::desc("Set the max unroll count for full unrolling, for testing purposes"));
+static cl::opt<unsigned> UnrollFullMaxCount(
+    "unroll-full-max-count", cl::Hidden,
+    cl::desc(
+        "Set the max unroll count for full unrolling, for testing purposes"));
 
 static cl::opt<bool>
-UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
-  cl::desc("Allows loops to be partially unrolled until "
-           "-unroll-threshold loop size is reached."));
+    UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
+                       cl::desc("Allows loops to be partially unrolled until "
+                                "-unroll-threshold loop size is reached."));
 
 static cl::opt<bool>
-UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
-  cl::desc("Unroll loops with run-time trip counts"));
-
-static cl::opt<unsigned>
-PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
-  cl::desc("Unrolled size limit for loops with an unroll(full) or "
-           "unroll_count pragma."));
+    UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
+                  cl::desc("Unroll loops with run-time trip counts"));
 
+static cl::opt<unsigned> PragmaUnrollThreshold(
+    "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
+    cl::desc("Unrolled size limit for loops with an unroll(full) or "
+             "unroll_count pragma."));
 
 /// A magic value for use with the Threshold parameter to indicate
 /// that the loop unroll should be performed regardless of how much
@@ -453,7 +453,8 @@ static unsigned UnrollCountPragmaValue(const Loop *L) {
 // unrolling pass is run more than once (which it generally is).
 static void SetLoopAlreadyUnrolled(Loop *L) {
   MDNode *LoopID = L->getLoopID();
-  if (!LoopID) return;
+  if (!LoopID)
+    return;
 
   // First remove any existing loop unrolling metadata.
   SmallVector<Metadata *, 4> MDs;
@@ -514,9 +515,9 @@ static bool canUnrollCompletely(Loop *L, unsigned Threshold,
       (int64_t)UnrolledCost - (int64_t)DynamicCostSavingsDiscount <=
           (int64_t)Threshold) {
     DEBUG(dbgs() << "  Can fully unroll, because unrolling will reduce the "
-                    "expected dynamic cost by " << PercentDynamicCostSaved
-                 << "% (threshold: " << PercentDynamicCostSavedThreshold
-                 << "%)\n"
+                    "expected dynamic cost by "
+                 << PercentDynamicCostSaved << "% (threshold: "
+                 << PercentDynamicCostSavedThreshold << "%)\n"
                  << "  and the unrolled cost (" << UnrolledCost
                  << ") is less than the max threshold ("
                  << DynamicCostSavingsDiscount << ").\n");
@@ -544,7 +545,7 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
                             Optional<bool> ProvidedRuntime) {
   BasicBlock *Header = L->getHeader();
   DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
-        << "] Loop %" << Header->getName() << "\n");
+               << "] Loop %" << Header->getName() << "\n");
 
   if (HasUnrollDisablePragma(L)) {
     return false;
@@ -592,7 +593,7 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
   // When computing the unrolled size, note that the conditional branch on the
   // backedge and the comparison feeding it are not replicated like the rest of
   // the loop body (which is why 2 is subtracted).
-  uint64_t UnrolledSize = (uint64_t)(LoopSize-2) * Count + 2;
+  uint64_t UnrolledSize = (uint64_t)(LoopSize - 2) * Count + 2;
   if (NotDuplicatable) {
     DEBUG(dbgs() << "  Not unrolling loop which contains non-duplicatable"
                  << " instructions.\n");
@@ -680,7 +681,7 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
     // the original count which satisfies the threshold limit.
     while (Count != 0 && UnrolledSize > UP.PartialThreshold) {
       Count >>= 1;
-      UnrolledSize = (LoopSize-2) * Count + 2;
+      UnrolledSize = (LoopSize - 2) * Count + 2;
     }
 
     if (Count > UP.MaxCount)
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index d1afed0fb58..981324a01e5 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -46,7 +46,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Vectorize.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/MapVector.h"
@@ -57,9 +56,9 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/DemandedBits.h"
@@ -73,6 +72,7 @@
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
@@ -98,9 +98,9 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/LoopVersioning.h"
-#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/LoopVersioning.h"
+#include "llvm/Transforms/Vectorize.h"
 #include <algorithm>
 #include <functional>
 #include <map>
@@ -116,16 +116,15 @@ STATISTIC(LoopsVectorized, "Number of loops vectorized");
 STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization");
 
 static cl::opt<bool>
-EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
-                   cl::desc("Enable if-conversion during vectorization."));
+    EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
+                       cl::desc("Enable if-conversion during vectorization."));
 
 /// We don't vectorize loops with a known constant trip count below this number.
-static cl::opt<unsigned>
-TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16),
-                             cl::Hidden,
-                             cl::desc("Don't vectorize loops with a constant "
-                                      "trip count that is smaller than this "
-                                      "value."));
+static cl::opt<unsigned> TinyTripCountVectorThreshold(
+    "vectorizer-min-trip-count", cl::init(16), cl::Hidden,
+    cl::desc("Don't vectorize loops with a constant "
+             "trip count that is smaller than this "
+             "value."));
 
 static cl::opt<bool> MaximizeBandwidth(
     "vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden,
@@ -263,7 +262,7 @@ public:
 /// A helper function for converting Scalar types to vector types.
 /// If the incoming type is void, we return void. If the VF is 1, we return
 /// the scalar type.
-static Type* ToVectorTy(Type *Scalar, unsigned VF) {
+static Type *ToVectorTy(Type *Scalar, unsigned VF) {
   if (Scalar->isVoidTy() || VF == 1)
     return Scalar;
   return VectorType::get(Scalar, VF);
@@ -327,7 +326,7 @@ public:
   // can be validly truncated to. The cost model has assumed this truncation
   // will happen when vectorizing.
   void vectorize(LoopVectorizationLegality *L,
-                 MapVector<Instruction*,uint64_t> MinimumBitWidths) {
+                 MapVector<Instruction *, uint64_t> MinimumBitWidths) {
     MinBWs = MinimumBitWidths;
     Legal = L;
     // Create a new empty loop. Unlink the old loop and connect the new one.
@@ -338,24 +337,22 @@ public:
   }
 
   // Return true if any runtime check is added.
-  bool IsSafetyChecksAdded() {
-    return AddedSafetyChecks;
-  }
+  bool IsSafetyChecksAdded() { return AddedSafetyChecks; }
 
   virtual ~InnerLoopVectorizer() {}
 
 protected:
   /// A small list of PHINodes.
-  typedef SmallVector<PHINode*, 4> PhiVector;
+  typedef SmallVector<PHINode *, 4> PhiVector;
   /// When we unroll loops we have multiple vector values for each scalar.
   /// This data structure holds the unrolled and vectorized values that
   /// originated from one scalar instruction.
-  typedef SmallVector<Value*, 2> VectorParts;
+  typedef SmallVector<Value *, 2> VectorParts;
 
   // When we if-convert we need to create edge masks. We have to cache values
   // so that we don't end up with exponential recursion/IR.
-  typedef DenseMap<std::pair<BasicBlock*, BasicBlock*>,
-                   VectorParts> EdgeMaskCache;
+  typedef DenseMap<std::pair<BasicBlock *, BasicBlock *>, VectorParts>
+      EdgeMaskCache;
 
   /// Create an empty loop, based on the loop ranges of the old loop.
   void createEmptyLoop();
@@ -392,8 +389,8 @@ protected:
   /// Vectorize a single PHINode in a block. This method handles the induction
   /// variable canonicalization. It supports both VF = 1 for unrolled loops and
   /// arbitrary length vectors.
-  void widenPHIInstruction(Instruction *PN, VectorParts &Entry,
-                           unsigned UF, unsigned VF, PhiVector *PV);
+  void widenPHIInstruction(Instruction *PN, VectorParts &Entry, unsigned UF,
+                           unsigned VF, PhiVector *PV);
 
   /// Insert the new loop to the loop hierarchy and pass manager
   /// and update the analysis passes.
@@ -404,7 +401,7 @@ protected:
   /// scalarized instruction behind an if block predicated on the control
   /// dependence of the instruction.
   virtual void scalarizeInstruction(Instruction *Instr,
-                                    bool IfPredicateStore=false);
+                                    bool IfPredicateStore = false);
 
   /// Vectorize Load and Store instructions,
   virtual void vectorizeMemoryInstruction(Instruction *Instr);
@@ -554,11 +551,11 @@ protected:
   BasicBlock *LoopScalarPreHeader;
   /// Middle Block between the vector and the scalar.
   BasicBlock *LoopMiddleBlock;
-  ///The ExitBlock of the scalar loop.
+  /// The ExitBlock of the scalar loop.
   BasicBlock *LoopExitBlock;
-  ///The vector loop body.
+  /// The vector loop body.
   SmallVector<BasicBlock *, 4> LoopVectorBody;
-  ///The scalar loop body.
+  /// The scalar loop body.
   BasicBlock *LoopScalarBody;
   /// A list of all bypass blocks. The first block is the entry of the loop.
   SmallVector<BasicBlock *, 4> LoopBypassBlocks;
@@ -571,7 +568,7 @@ protected:
   ValueMap WidenMap;
   /// Store instructions that should be predicated, as a pair
   ///   <StoreInst, Predicate>
-  SmallVector<std::pair<StoreInst*,Value*>, 4> PredicatedStores;
+  SmallVector<std::pair<StoreInst *, Value *>, 4> PredicatedStores;
   EdgeMaskCache MaskCache;
   /// Trip count of the original loop.
   Value *TripCount;
@@ -581,7 +578,7 @@ protected:
   /// Map of scalar integer values to the smallest bitwidth they can be legally
   /// represented as. The vector equivalents of these values should be truncated
   /// to this type.
-  MapVector<Instruction*,uint64_t> MinBWs;
+  MapVector<Instruction *, uint64_t> MinBWs;
   LoopVectorizationLegality *Legal;
 
   // Record whether runtime check is added.
@@ -665,10 +662,8 @@ static void propagateMetadata(Instruction *To, const Instruction *From) {
     // on the condition, and thus actually aliased with some other
     // non-speculated memory access when the condition was false, this would be
     // caught by the runtime overlap checks).
-    if (Kind != LLVMContext::MD_tbaa &&
-        Kind != LLVMContext::MD_alias_scope &&
-        Kind != LLVMContext::MD_noalias &&
-        Kind != LLVMContext::MD_fpmath &&
+    if (Kind != LLVMContext::MD_tbaa && Kind != LLVMContext::MD_alias_scope &&
+        Kind != LLVMContext::MD_noalias && Kind != LLVMContext::MD_fpmath &&
         Kind != LLVMContext::MD_nontemporal)
       continue;
 
@@ -934,20 +929,16 @@ private:
 /// for example 'force', means a decision has been made. So, we need to be
 /// careful NOT to add them if the user hasn't specifically asked so.
 class LoopVectorizeHints {
-  enum HintKind {
-    HK_WIDTH,
-    HK_UNROLL,
-    HK_FORCE
-  };
+  enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE };
 
   /// Hint - associates name and validation with the hint value.
   struct Hint {
-    const char * Name;
+    const char *Name;
     unsigned Value; // This may have to change for non-numeric values.
     HintKind Kind;
 
-    Hint(const char * Name, unsigned Value, HintKind Kind)
-      : Name(Name), Value(Value), Kind(Kind) { }
+    Hint(const char *Name, unsigned Value, HintKind Kind)
+        : Name(Name), Value(Value), Kind(Kind) {}
 
     bool validate(unsigned Val) {
       switch (Kind) {
@@ -1094,9 +1085,7 @@ public:
     return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe;
   }
 
-  void setPotentiallyUnsafe() {
-    PotentiallyUnsafe = true;
-  }
+  void setPotentiallyUnsafe() { PotentiallyUnsafe = true; }
 
 private:
   /// Find hints specified in the loop metadata and update local values.
@@ -1143,7 +1132,8 @@ private:
     Name = Name.substr(Prefix().size(), StringRef::npos);
 
     const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
-    if (!C) return;
+    if (!C)
+      return;
     unsigned Val = C->getZExtValue();
 
     Hint *Hints[] = {&Width, &Interleave, &Force};
@@ -1169,7 +1159,7 @@ private:
 
   /// Matches metadata with hint name.
   bool matchesHintMetadataName(MDNode *Node, ArrayRef<Hint> HintTypes) {
-    MDString* Name = dyn_cast<MDString>(Node->getOperand(0));
+    MDString *Name = dyn_cast<MDString>(Node->getOperand(0));
     if (!Name)
       return false;
 
@@ -1271,7 +1261,7 @@ public:
 
   /// InductionList saves induction variables and maps them to the
   /// induction descriptor.
-  typedef MapVector<PHINode*, InductionDescriptor> InductionList;
+  typedef MapVector<PHINode *, InductionDescriptor> InductionList;
 
   /// RecurrenceSet contains the phi nodes that are recurrences other than
   /// inductions and reductions.
@@ -1324,16 +1314,14 @@ public:
   bool isUniform(Value *V);
 
   /// Returns true if this instruction will remain scalar after vectorization.
-  bool isUniformAfterVectorization(Instruction* I) { return Uniforms.count(I); }
+  bool isUniformAfterVectorization(Instruction *I) { return Uniforms.count(I); }
 
   /// Returns the information that we collected about runtime memory check.
   const RuntimePointerChecking *getRuntimePointerChecking() const {
     return LAI->getRuntimePointerChecking();
   }
 
-  const LoopAccessInfo *getLAI() const {
-    return LAI;
-  }
+  const LoopAccessInfo *getLAI() const { return LAI; }
 
   /// \brief Check if \p Instr belongs to any interleaved access group.
   bool isAccessInterleaved(Instruction *Instr) {
@@ -1383,18 +1371,11 @@ public:
 
   /// Returns true if vector representation of the instruction \p I
   /// requires mask.
-  bool isMaskRequired(const Instruction* I) {
-    return (MaskedOp.count(I) != 0);
-  }
-  unsigned getNumStores() const {
-    return LAI->getNumStores();
-  }
-  unsigned getNumLoads() const {
-    return LAI->getNumLoads();
-  }
-  unsigned getNumPredStores() const {
-    return NumPredStores;
-  }
+  bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); }
+  unsigned getNumStores() const { return LAI->getNumStores(); }
+  unsigned getNumLoads() const { return LAI->getNumLoads(); }
+  unsigned getNumPredStores() const { return NumPredStores; }
+
 private:
   /// Check if a single basic block loop is vectorizable.
   /// At this point we know that this is a loop with a constant trip count
@@ -1479,10 +1460,10 @@ private:
 
   /// Allowed outside users. This holds the reduction
   /// vars which can be accessed from outside the loop.
-  SmallPtrSet<Value*, 4> AllowedExit;
+  SmallPtrSet<Value *, 4> AllowedExit;
   /// This set holds the variables which are known to be uniform after
   /// vectorization.
-  SmallPtrSet<Instruction*, 4> Uniforms;
+  SmallPtrSet<Instruction *, 4> Uniforms;
 
   /// Can we assume the absence of NaNs.
   bool HasFunNoNaNAttr;
@@ -1513,9 +1494,9 @@ public:
   LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
                              LoopVectorizationLegality *Legal,
                              const TargetTransformInfo &TTI,
-                             const TargetLibraryInfo *TLI,
-                             DemandedBits *DB, AssumptionCache *AC,
-                             const Function *F, const LoopVectorizeHints *Hints,
+                             const TargetLibraryInfo *TLI, DemandedBits *DB,
+                             AssumptionCache *AC, const Function *F,
+                             const LoopVectorizeHints *Hints,
                              SmallPtrSetImpl<const Value *> &ValuesToIgnore)
       : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB),
         TheFunction(F), Hints(Hints), ValuesToIgnore(ValuesToIgnore) {}
@@ -1523,7 +1504,7 @@ public:
   /// Information about vectorization costs
   struct VectorizationFactor {
     unsigned Width; // Vector width with best cost
-    unsigned Cost; // Cost of the loop with that width
+    unsigned Cost;  // Cost of the loop with that width
   };
   /// \return The most profitable vectorization factor and the cost of that VF.
   /// This method checks every power of two up to VF. If UserVF is not ZERO
@@ -1567,8 +1548,10 @@ public:
 
 private:
   /// The vectorization cost is a combination of the cost itself and a boolean
-  /// indicating whether any of the contributing operations will actually operate on
-  /// vector values after type legalization in the backend. If this latter value is
+  /// indicating whether any of the contributing operations will actually
+  /// operate on
+  /// vector values after type legalization in the backend. If this latter value
+  /// is
   /// false, then all operations will be scalarized (i.e. no vectorization has
   /// actually taken place).
   typedef std::pair<unsigned, bool> VectorizationCostTy;
@@ -1603,7 +1586,7 @@ public:
   /// Map of scalar integer values to the smallest bitwidth they can be legally
   /// represented as. The vector equivalents of these values should be truncated
   /// to this type.
-  MapVector<Instruction*,uint64_t> MinBWs;
+  MapVector<Instruction *, uint64_t> MinBWs;
 
   /// The loop that we evaluate.
   Loop *TheLoop;
@@ -1699,9 +1682,8 @@ struct LoopVectorize : public FunctionPass {
   static char ID;
 
   explicit LoopVectorize(bool NoUnrolling = false, bool AlwaysVectorize = true)
-    : FunctionPass(ID),
-      DisableUnrolling(NoUnrolling),
-      AlwaysVectorize(AlwaysVectorize) {
+      : FunctionPass(ID), DisableUnrolling(NoUnrolling),
+        AlwaysVectorize(AlwaysVectorize) {
     initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
   }
 
@@ -1823,7 +1805,8 @@ struct LoopVectorize : public FunctionPass {
                          ? "disabled"
                          : (Hints.getForce() == LoopVectorizeHints::FK_Enabled
                                 ? "enabled"
-                                : "?")) << " width=" << Hints.getWidth()
+                                : "?"))
+                 << " width=" << Hints.getWidth()
                  << " unroll=" << Hints.getInterleave() << "\n");
 
     // Function containing loop
@@ -1887,8 +1870,8 @@ struct LoopVectorize : public FunctionPass {
 
     // Check the function attributes to find out if this function should be
     // optimized for size.
-    bool OptForSize = Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
-                      F->optForSize();
+    bool OptForSize =
+        Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize();
 
     // Compute the weighted frequency of this loop being executed and see if it
     // is less than 20% of the function entry baseline frequency. Note that we
@@ -1908,7 +1891,7 @@ struct LoopVectorize : public FunctionPass {
     // vector instructions?
     if (F->hasFnAttribute(Attribute::NoImplicitFloat)) {
       DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat"
-            "attribute is used.\n");
+                      "attribute is used.\n");
       emitAnalysisDiag(
           F, L, Hints,
           VectorizationReport()
@@ -1924,10 +1907,9 @@ struct LoopVectorize : public FunctionPass {
     if (Hints.isPotentiallyUnsafe() &&
         TTI->isFPVectorizationPotentiallyUnsafe()) {
       DEBUG(dbgs() << "LV: Potentially unsafe FP op prevents vectorization.\n");
-      emitAnalysisDiag(
-          F, L, Hints,
-          VectorizationReport()
-             << "loop not vectorized due to unsafe FP support.");
+      emitAnalysisDiag(F, L, Hints,
+                       VectorizationReport()
+                           << "loop not vectorized due to unsafe FP support.");
       emitMissedWarning(F, L, Hints);
       return false;
     }
@@ -2058,7 +2040,6 @@ struct LoopVectorize : public FunctionPass {
     AU.addPreserved<BasicAAWrapperPass>();
     AU.addPreserved<GlobalsAAWrapperPass>();
   }
-
 };
 
 } // end anonymous namespace
@@ -2071,9 +2052,9 @@ struct LoopVectorize : public FunctionPass {
 Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
   // We need to place the broadcast of invariant variables outside the loop.
   Instruction *Instr = dyn_cast<Instruction>(V);
-  bool NewInstr =
-      (Instr && std::find(LoopVectorBody.begin(), LoopVectorBody.end(),
-                          Instr->getParent()) != LoopVectorBody.end());
+  bool NewInstr = (Instr &&
+                   std::find(LoopVectorBody.begin(), LoopVectorBody.end(),
+                             Instr->getParent()) != LoopVectorBody.end());
   bool Invariant = OrigLoop->isLoopInvariant(V) && !NewInstr;
 
   // Place the code for broadcasting invariant variables in the new preheader.
@@ -2098,7 +2079,7 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
   Type *ITy = Val->getType()->getScalarType();
   VectorType *Ty = cast<VectorType>(Val->getType());
   int VLen = Ty->getNumElements();
-  SmallVector<Constant*, 8> Indices;
+  SmallVector<Constant *, 8> Indices;
 
   // Create a vector of consecutive numbers from zero to VF.
   for (int i = 0; i < VLen; ++i)
@@ -2204,7 +2185,7 @@ bool LoopVectorizationLegality::isUniform(Value *V) {
   return LAI->isUniform(V);
 }
 
-InnerLoopVectorizer::VectorParts&
+InnerLoopVectorizer::VectorParts &
 InnerLoopVectorizer::getVectorValue(Value *V) {
   assert(V != Induction && "The new induction variable should not be used.");
   assert(!V->getType()->isVectorTy() && "Can't widen a vector");
@@ -2225,7 +2206,7 @@ InnerLoopVectorizer::getVectorValue(Value *V) {
 
 Value *InnerLoopVectorizer::reverseVector(Value *Vec) {
   assert(Vec->getType()->isVectorTy() && "Invalid type");
-  SmallVector<Constant*, 8> ShuffleMask;
+  SmallVector<Constant *, 8> ShuffleMask;
   for (unsigned i = 0; i < VF; ++i)
     ShuffleMask.push_back(Builder.getInt32(VF - i - 1));
 
@@ -2518,10 +2499,10 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
   // scalarize the instruction.
   int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
   bool Reverse = ConsecutiveStride < 0;
-  bool CreateGatherScatter = !ConsecutiveStride &&
-    ((LI && Legal->isLegalMaskedGather(ScalarDataTy)) ||
-     (SI && Legal->isLegalMaskedScatter(ScalarDataTy)));
- 
+  bool CreateGatherScatter =
+      !ConsecutiveStride && ((LI && Legal->isLegalMaskedGather(ScalarDataTy)) ||
+                             (SI && Legal->isLegalMaskedScatter(ScalarDataTy)));
+
   if (!ConsecutiveStride && !CreateGatherScatter)
     return scalarizeInstruction(Instr);
 
@@ -2534,83 +2515,83 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
   if (ConsecutiveStride) {
     if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) {
       setDebugLocFromInst(Builder, Gep);
-        Value *PtrOperand = Gep->getPointerOperand();
-        Value *FirstBasePtr = getVectorValue(PtrOperand)[0];
-        FirstBasePtr = Builder.CreateExtractElement(FirstBasePtr, Zero);
-
-        // Create the new GEP with the new induction variable.
-        GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
-        Gep2->setOperand(0, FirstBasePtr);
-        Gep2->setName("gep.indvar.base");
-        Ptr = Builder.Insert(Gep2);
-      } else if (Gep) {
-       setDebugLocFromInst(Builder, Gep);
-       assert(PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getPointerOperand()),
-                                           OrigLoop) &&
-              "Base ptr must be invariant");
-        // The last index does not have to be the induction. It can be
-        // consecutive and be a function of the index. For example A[I+1];
-        unsigned NumOperands = Gep->getNumOperands();
-        unsigned InductionOperand = getGEPInductionOperand(Gep);
-        // Create the new GEP with the new induction variable.
-        GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
-
-        for (unsigned i = 0; i < NumOperands; ++i) {
-          Value *GepOperand = Gep->getOperand(i);
-          Instruction *GepOperandInst = dyn_cast<Instruction>(GepOperand);
-
-          // Update last index or loop invariant instruction anchored in loop.
-          if (i == InductionOperand ||
-              (GepOperandInst && OrigLoop->contains(GepOperandInst))) {
-            assert((i == InductionOperand ||
-                    PSE.getSE()->isLoopInvariant(PSE.getSCEV(GepOperandInst),
-                                                 OrigLoop)) &&
-                    "Must be last index or loop invariant");
-
-            VectorParts &GEPParts = getVectorValue(GepOperand);
-            Value *Index = GEPParts[0];
-            Index = Builder.CreateExtractElement(Index, Zero);
-            Gep2->setOperand(i, Index);
-            Gep2->setName("gep.indvar.idx");
-          }
+      Value *PtrOperand = Gep->getPointerOperand();
+      Value *FirstBasePtr = getVectorValue(PtrOperand)[0];
+      FirstBasePtr = Builder.CreateExtractElement(FirstBasePtr, Zero);
+
+      // Create the new GEP with the new induction variable.
+      GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
+      Gep2->setOperand(0, FirstBasePtr);
+      Gep2->setName("gep.indvar.base");
+      Ptr = Builder.Insert(Gep2);
+    } else if (Gep) {
+      setDebugLocFromInst(Builder, Gep);
+      assert(PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getPointerOperand()),
+                                          OrigLoop) &&
+             "Base ptr must be invariant");
+      // The last index does not have to be the induction. It can be
+      // consecutive and be a function of the index. For example A[I+1];
+      unsigned NumOperands = Gep->getNumOperands();
+      unsigned InductionOperand = getGEPInductionOperand(Gep);
+      // Create the new GEP with the new induction variable.
+      GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
+
+      for (unsigned i = 0; i < NumOperands; ++i) {
+        Value *GepOperand = Gep->getOperand(i);
+        Instruction *GepOperandInst = dyn_cast<Instruction>(GepOperand);
+
+        // Update last index or loop invariant instruction anchored in loop.
+        if (i == InductionOperand ||
+            (GepOperandInst && OrigLoop->contains(GepOperandInst))) {
+          assert((i == InductionOperand ||
+                  PSE.getSE()->isLoopInvariant(PSE.getSCEV(GepOperandInst),
+                                               OrigLoop)) &&
+                 "Must be last index or loop invariant");
+
+          VectorParts &GEPParts = getVectorValue(GepOperand);
+          Value *Index = GEPParts[0];
+          Index = Builder.CreateExtractElement(Index, Zero);
+          Gep2->setOperand(i, Index);
+          Gep2->setName("gep.indvar.idx");
         }
-        Ptr = Builder.Insert(Gep2);
-      } else { // No GEP
-        // Use the induction element ptr.
-        assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
-        setDebugLocFromInst(Builder, Ptr);
-        VectorParts &PtrVal = getVectorValue(Ptr);
-        Ptr = Builder.CreateExtractElement(PtrVal[0], Zero);
       }
-    } else {
-      // At this point we should vector version of GEP for Gather or Scatter
-      assert(CreateGatherScatter && "The instruction should be scalarized");
-      if (Gep) {
-        SmallVector<VectorParts, 4> OpsV;
-        // Vectorizing GEP, across UF parts, we want to keep each loop-invariant
-        // base or index of GEP scalar
-        for (Value *Op : Gep->operands()) {
-          if (PSE.getSE()->isLoopInvariant(PSE.getSCEV(Op), OrigLoop))
-            OpsV.push_back(VectorParts(UF, Op));
-          else
-            OpsV.push_back(getVectorValue(Op));
-        }
-
-        for (unsigned Part = 0; Part < UF; ++Part) {
-          SmallVector<Value*, 4> Ops;
-          Value *GEPBasePtr = OpsV[0][Part];
-          for (unsigned i = 1; i < Gep->getNumOperands(); i++)
-            Ops.push_back(OpsV[i][Part]);
-          Value *NewGep = Builder.CreateGEP(nullptr, GEPBasePtr, Ops,
-                                            "VectorGep");
-          assert(NewGep->getType()->isVectorTy() && "Expected vector GEP");
-          NewGep = Builder.CreateBitCast(NewGep,
-                                         VectorType::get(Ptr->getType(), VF));
-          VectorGep.push_back(NewGep);
-        }
-      } else
-        VectorGep = getVectorValue(Ptr);
+      Ptr = Builder.Insert(Gep2);
+    } else { // No GEP
+      // Use the induction element ptr.
+      assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
+      setDebugLocFromInst(Builder, Ptr);
+      VectorParts &PtrVal = getVectorValue(Ptr);
+      Ptr = Builder.CreateExtractElement(PtrVal[0], Zero);
     }
+  } else {
+    // At this point we should vector version of GEP for Gather or Scatter
+    assert(CreateGatherScatter && "The instruction should be scalarized");
+    if (Gep) {
+      SmallVector<VectorParts, 4> OpsV;
+      // Vectorizing GEP, across UF parts, we want to keep each loop-invariant
+      // base or index of GEP scalar
+      for (Value *Op : Gep->operands()) {
+        if (PSE.getSE()->isLoopInvariant(PSE.getSCEV(Op), OrigLoop))
+          OpsV.push_back(VectorParts(UF, Op));
+        else
+          OpsV.push_back(getVectorValue(Op));
+      }
+
+      for (unsigned Part = 0; Part < UF; ++Part) {
+        SmallVector<Value *, 4> Ops;
+        Value *GEPBasePtr = OpsV[0][Part];
+        for (unsigned i = 1; i < Gep->getNumOperands(); i++)
+          Ops.push_back(OpsV[i][Part]);
+        Value *NewGep =
+            Builder.CreateGEP(nullptr, GEPBasePtr, Ops, "VectorGep");
+        assert(NewGep->getType()->isVectorTy() && "Expected vector GEP");
+        NewGep =
+            Builder.CreateBitCast(NewGep, VectorType::get(Ptr->getType(), VF));
+        VectorGep.push_back(NewGep);
+      }
+    } else
+      VectorGep = getVectorValue(Ptr);
+  }
 
   VectorParts Mask = createBlockInMask(Instr->getParent());
   // Handle Stores:
@@ -2631,7 +2612,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
       } else {
         // Calculate the pointer for the specific unroll-part.
         Value *PartPtr =
-          Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF));
+            Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF));
 
         if (Reverse) {
           // If we store to reverse consecutive memory locations, then we need
@@ -2639,20 +2620,22 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
           StoredVal[Part] = reverseVector(StoredVal[Part]);
           // If the address is consecutive but reversed, then the
           // wide store needs to start at the last vector element.
-          PartPtr = Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF));
-          PartPtr = Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF));
+          PartPtr =
+              Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF));
+          PartPtr =
+              Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF));
           Mask[Part] = reverseVector(Mask[Part]);
         }
 
-        Value *VecPtr = Builder.CreateBitCast(PartPtr,
-                                              DataTy->getPointerTo(AddressSpace));
+        Value *VecPtr =
+            Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
 
         if (Legal->isMaskRequired(SI))
           NewSI = Builder.CreateMaskedStore(StoredVal[Part], VecPtr, Alignment,
                                             Mask[Part]);
-        else 
-          NewSI = Builder.CreateAlignedStore(StoredVal[Part], VecPtr,
-                                             Alignment);
+        else
+          NewSI =
+              Builder.CreateAlignedStore(StoredVal[Part], VecPtr, Alignment);
       }
       addMetadata(NewSI, SI);
     }
@@ -2663,16 +2646,16 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
   assert(LI && "Must have a load instruction");
   setDebugLocFromInst(Builder, LI);
   for (unsigned Part = 0; Part < UF; ++Part) {
-    Instruction* NewLI;
+    Instruction *NewLI;
     if (CreateGatherScatter) {
       Value *MaskPart = Legal->isMaskRequired(LI) ? Mask[Part] : nullptr;
-      NewLI = Builder.CreateMaskedGather(VectorGep[Part], Alignment,
-                                         MaskPart, 0, "wide.masked.gather");
+      NewLI = Builder.CreateMaskedGather(VectorGep[Part], Alignment, MaskPart,
+                                         0, "wide.masked.gather");
       Entry[Part] = NewLI;
     } else {
       // Calculate the pointer for the specific unroll-part.
       Value *PartPtr =
-        Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF));
+          Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF));
 
       if (Reverse) {
         // If the address is consecutive but reversed, then the
@@ -2682,15 +2665,15 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
         Mask[Part] = reverseVector(Mask[Part]);
       }
 
-      Value *VecPtr = Builder.CreateBitCast(PartPtr,
-                                            DataTy->getPointerTo(AddressSpace));
+      Value *VecPtr =
+          Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
       if (Legal->isMaskRequired(LI))
         NewLI = Builder.CreateMaskedLoad(VecPtr, Alignment, Mask[Part],
                                          UndefValue::get(DataTy),
                                          "wide.masked.load");
       else
         NewLI = Builder.CreateAlignedLoad(VecPtr, Alignment, "wide.load");
-      Entry[Part] = Reverse ? reverseVector(NewLI) :  NewLI;
+      Entry[Part] = Reverse ? reverseVector(NewLI) : NewLI;
     }
     addMetadata(NewLI, LI);
   }
@@ -2737,8 +2720,9 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
   // Does this instruction return a value ?
   bool IsVoidRetTy = Instr->getType()->isVoidTy();
 
-  Value *UndefVec = IsVoidRetTy ? nullptr :
-    UndefValue::get(VectorType::get(Instr->getType(), VF));
+  Value *UndefVec =
+      IsVoidRetTy ? nullptr
+                  : UndefValue::get(VectorType::get(Instr->getType(), VF));
   // Create a new entry in the WidenMap and initialize it to Undef or Null.
   VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
 
@@ -2791,8 +2775,8 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
                                                        Builder.getInt32(Width));
       // End if-block.
       if (IfPredicateStore)
-        PredicatedStores.push_back(std::make_pair(cast<StoreInst>(Cloned),
-                                                  Cmp));
+        PredicatedStores.push_back(
+            std::make_pair(cast<StoreInst>(Cloned), Cmp));
     }
   }
 }
@@ -2866,9 +2850,8 @@ Value *InnerLoopVectorizer::getOrCreateTripCount(Loop *L) {
 
   if (TripCount->getType()->isPointerTy())
     TripCount =
-      CastInst::CreatePointerCast(TripCount, IdxTy,
-                                  "exitcount.ptrcnt.to.int",
-                                  L->getLoopPreheader()->getTerminator());
+        CastInst::CreatePointerCast(TripCount, IdxTy, "exitcount.ptrcnt.to.int",
+                                    L->getLoopPreheader()->getTerminator());
 
   return TripCount;
 }
@@ -2913,13 +2896,11 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
 
   // Generate code to check that the loop's trip count that we computed by
   // adding one to the backedge-taken count will not overflow.
-  Value *CheckMinIters =
-    Builder.CreateICmpULT(Count,
-                          ConstantInt::get(Count->getType(), VF * UF),
-                          "min.iters.check");
+  Value *CheckMinIters = Builder.CreateICmpULT(
+      Count, ConstantInt::get(Count->getType(), VF * UF), "min.iters.check");
 
-  BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(),
-                                          "min.iters.checked");
+  BasicBlock *NewBB =
+      BB->splitBasicBlock(BB->getTerminator(), "min.iters.checked");
   // Update dominator tree immediately if the generated block is a
   // LoopBypassBlock because SCEV expansions to generate loop bypass
   // checks may query it before the current function is finished.
@@ -2944,8 +2925,7 @@ void InnerLoopVectorizer::emitVectorLoopEnteredCheck(Loop *L,
 
   // Generate code to check that the loop's trip count that we computed by
   // adding one to the backedge-taken count will not overflow.
-  BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(),
-                                          "vector.ph");
+  BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
   // Update dominator tree immediately if the generated block is a
   // LoopBypassBlock because SCEV expansions to generate loop bypass
   // checks may query it before the current function is finished.
@@ -2987,8 +2967,7 @@ void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
   AddedSafetyChecks = true;
 }
 
-void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L,
-                                               BasicBlock *Bypass) {
+void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
   BasicBlock *BB = L->getLoopPreheader();
 
   // Generate the code that checks in runtime if arrays overlap. We put the
@@ -3022,7 +3001,6 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L,
   LVer->prepareNoAliasMetadata();
 }
 
-
 void InnerLoopVectorizer::createEmptyLoop() {
   /*
    In this function we generate a new loop. The new loop will contain
@@ -3080,12 +3058,12 @@ void InnerLoopVectorizer::createEmptyLoop() {
   BasicBlock *VecBody =
       VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body");
   BasicBlock *MiddleBlock =
-  VecBody->splitBasicBlock(VecBody->getTerminator(), "middle.block");
+      VecBody->splitBasicBlock(VecBody->getTerminator(), "middle.block");
   BasicBlock *ScalarPH =
-  MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(), "scalar.ph");
+      MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(), "scalar.ph");
 
   // Create and register the new vector loop.
-  Loop* Lp = new Loop();
+  Loop *Lp = new Loop();
   Loop *ParentLoop = OrigLoop->getParentLoop();
 
   // Insert the new loop into the loop nest and register the new basic blocks
@@ -3127,8 +3105,8 @@ void InnerLoopVectorizer::createEmptyLoop() {
   Value *CountRoundDown = getOrCreateVectorTripCount(Lp);
   Constant *Step = ConstantInt::get(IdxTy, VF * UF);
   Induction =
-    createInductionVariable(Lp, StartIdx, CountRoundDown, Step,
-                            getDebugLocFromInstOrOperands(OldInduction));
+      createInductionVariable(Lp, StartIdx, CountRoundDown, Step,
+                              getDebugLocFromInstOrOperands(OldInduction));
 
   // We are going to resume the execution of the scalar loop.
   // Go over all of the induction variables that we found and fix the
@@ -3148,18 +3126,16 @@ void InnerLoopVectorizer::createEmptyLoop() {
     InductionDescriptor II = I->second;
 
     // Create phi nodes to merge from the  backedge-taken check block.
-    PHINode *BCResumeVal = PHINode::Create(OrigPhi->getType(), 3,
-                                           "bc.resume.val",
-                                           ScalarPH->getTerminator());
+    PHINode *BCResumeVal = PHINode::Create(
+        OrigPhi->getType(), 3, "bc.resume.val", ScalarPH->getTerminator());
     Value *EndValue;
     if (OrigPhi == OldInduction) {
       // We know what the end value is.
       EndValue = CountRoundDown;
     } else {
       IRBuilder<> B(LoopBypassBlocks.back()->getTerminator());
-      Value *CRD = B.CreateSExtOrTrunc(CountRoundDown,
-                                       II.getStepValue()->getType(),
-                                       "cast.crd");
+      Value *CRD = B.CreateSExtOrTrunc(
+          CountRoundDown, II.getStepValue()->getType(), "cast.crd");
       EndValue = II.transform(B, CRD);
       EndValue->setName("ind.end");
     }
@@ -3181,9 +3157,9 @@ void InnerLoopVectorizer::createEmptyLoop() {
   // Add a check in the middle block to see if we have completed
   // all of the iterations in the first vector loop.
   // If (N - N%VF) == N, then we *don't* need to run the remainder.
-  Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count,
-                                CountRoundDown, "cmp.n",
-                                MiddleBlock->getTerminator());
+  Value *CmpN =
+      CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count,
+                      CountRoundDown, "cmp.n", MiddleBlock->getTerminator());
   ReplaceInstWithInst(MiddleBlock->getTerminator(),
                       BranchInst::Create(ExitBlock, ScalarPH, CmpN));
 
@@ -3238,9 +3214,7 @@ struct CSEDenseMapInfo {
 /// = ...;  " blocks. We start with one vectorized basic block. For every
 /// conditional block we split this vectorized block. Therefore, every second
 /// block will be a predicated one.
-static bool isPredicatedBlock(unsigned BlockNum) {
-  return BlockNum % 2;
-}
+static bool isPredicatedBlock(unsigned BlockNum) { return BlockNum % 2; }
 
 ///\brief Perform cse of induction variable instructions.
 static void cse(SmallVector<BasicBlock *, 4> &BBs) {
@@ -3274,7 +3248,7 @@ static void cse(SmallVector<BasicBlock *, 4> &BBs) {
 
 /// \brief Adds a 'fast' flag to floating point operations.
 static Value *addFastMathFlag(Value *V) {
-  if (isa<FPMathOperator>(V)){
+  if (isa<FPMathOperator>(V)) {
     FastMathFlags Flags;
     Flags.setUnsafeAlgebra();
     cast<Instruction>(V)->setFastMathFlags(Flags);
@@ -3397,8 +3371,8 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
       if (I->use_empty())
         continue;
       Type *OriginalTy = I->getType();
-      Type *ScalarTruncatedTy = IntegerType::get(OriginalTy->getContext(),
-                                                 KV.second);
+      Type *ScalarTruncatedTy =
+          IntegerType::get(OriginalTy->getContext(), KV.second);
       Type *TruncatedTy = VectorType::get(ScalarTruncatedTy,
                                           OriginalTy->getVectorNumElements());
       if (TruncatedTy == OriginalTy)
@@ -3408,7 +3382,7 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
         continue;
 
       IRBuilder<> B(cast<Instruction>(I));
-      auto ShrinkOperand = [&](Value *V) -> Value* {
+      auto ShrinkOperand = [&](Value *V) -> Value * {
         if (auto *ZI = dyn_cast<ZExtInst>(V))
           if (ZI->getSrcTy() == TruncatedTy)
             return ZI->getOperand(0);
@@ -3419,44 +3393,42 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
       // unfortunately.
       Value *NewI = nullptr;
       if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
-        NewI = B.CreateBinOp(BO->getOpcode(),
-                             ShrinkOperand(BO->getOperand(0)),
+        NewI = B.CreateBinOp(BO->getOpcode(), ShrinkOperand(BO->getOperand(0)),
                              ShrinkOperand(BO->getOperand(1)));
         cast<BinaryOperator>(NewI)->copyIRFlags(I);
       } else if (ICmpInst *CI = dyn_cast<ICmpInst>(I)) {
-        NewI = B.CreateICmp(CI->getPredicate(),
-                            ShrinkOperand(CI->getOperand(0)),
-                            ShrinkOperand(CI->getOperand(1)));
+        NewI =
+            B.CreateICmp(CI->getPredicate(), ShrinkOperand(CI->getOperand(0)),
+                         ShrinkOperand(CI->getOperand(1)));
       } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
         NewI = B.CreateSelect(SI->getCondition(),
                               ShrinkOperand(SI->getTrueValue()),
                               ShrinkOperand(SI->getFalseValue()));
       } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
         switch (CI->getOpcode()) {
-        default: llvm_unreachable("Unhandled cast!");
+        default:
+          llvm_unreachable("Unhandled cast!");
         case Instruction::Trunc:
           NewI = ShrinkOperand(CI->getOperand(0));
           break;
         case Instruction::SExt:
-          NewI = B.CreateSExtOrTrunc(CI->getOperand(0),
-                                     smallestIntegerVectorType(OriginalTy,
-                                                               TruncatedTy));
+          NewI = B.CreateSExtOrTrunc(
+              CI->getOperand(0),
+              smallestIntegerVectorType(OriginalTy, TruncatedTy));
           break;
         case Instruction::ZExt:
-          NewI = B.CreateZExtOrTrunc(CI->getOperand(0),
-                                     smallestIntegerVectorType(OriginalTy,
-                                                               TruncatedTy));
+          NewI = B.CreateZExtOrTrunc(
+              CI->getOperand(0),
+              smallestIntegerVectorType(OriginalTy, TruncatedTy));
           break;
         }
       } else if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(I)) {
         auto Elements0 = SI->getOperand(0)->getType()->getVectorNumElements();
-        auto *O0 =
-          B.CreateZExtOrTrunc(SI->getOperand(0),
-                              VectorType::get(ScalarTruncatedTy, Elements0));
+        auto *O0 = B.CreateZExtOrTrunc(
+            SI->getOperand(0), VectorType::get(ScalarTruncatedTy, Elements0));
         auto Elements1 = SI->getOperand(1)->getType()->getVectorNumElements();
-        auto *O1 =
-          B.CreateZExtOrTrunc(SI->getOperand(1),
-                              VectorType::get(ScalarTruncatedTy, Elements1));
+        auto *O1 = B.CreateZExtOrTrunc(
+            SI->getOperand(1), VectorType::get(ScalarTruncatedTy, Elements1));
 
         NewI = B.CreateShuffleVector(O0, O1, SI->getMask());
       } else if (isa<LoadInst>(I)) {
@@ -3525,8 +3497,8 @@ void InnerLoopVectorizer::vectorizeLoop() {
   DFS.perform(LI);
 
   // Vectorize all of the blocks in the original loop.
-  for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(),
-       be = DFS.endRPO(); bb != be; ++bb)
+  for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(), be = DFS.endRPO();
+       bb != be; ++bb)
     vectorizeBlockInLoop(*bb, &PHIsToFix);
 
   // Insert truncates and extends for any truncated instructions as hints to
@@ -3614,10 +3586,10 @@ void InnerLoopVectorizer::vectorizeLoop() {
       // Make sure to add the reduction stat value only to the
       // first unroll part.
       Value *StartVal = (part == 0) ? VectorStart : Identity;
-      cast<PHINode>(VecRdxPhi[part])->addIncoming(StartVal,
-                                                  LoopVectorPreHeader);
-      cast<PHINode>(VecRdxPhi[part])->addIncoming(Val[part],
-                                                  LoopVectorBody.back());
+      cast<PHINode>(VecRdxPhi[part])
+          ->addIncoming(StartVal, LoopVectorPreHeader);
+      cast<PHINode>(VecRdxPhi[part])
+          ->addIncoming(Val[part], LoopVectorBody.back());
     }
 
     // Before each round, move the insertion point right between
@@ -3675,21 +3647,19 @@ void InnerLoopVectorizer::vectorizeLoop() {
       assert(isPowerOf2_32(VF) &&
              "Reduction emission only supported for pow2 vectors!");
       Value *TmpVec = ReducedPartRdx;
-      SmallVector<Constant*, 32> ShuffleMask(VF, nullptr);
+      SmallVector<Constant *, 32> ShuffleMask(VF, nullptr);
       for (unsigned i = VF; i != 1; i >>= 1) {
         // Move the upper half of the vector to the lower half.
-        for (unsigned j = 0; j != i/2; ++j)
-          ShuffleMask[j] = Builder.getInt32(i/2 + j);
+        for (unsigned j = 0; j != i / 2; ++j)
+          ShuffleMask[j] = Builder.getInt32(i / 2 + j);
 
         // Fill the rest of the mask with undef.
-        std::fill(&ShuffleMask[i/2], ShuffleMask.end(),
+        std::fill(&ShuffleMask[i / 2], ShuffleMask.end(),
                   UndefValue::get(Builder.getInt32Ty()));
 
-        Value *Shuf =
-        Builder.CreateShuffleVector(TmpVec,
-                                    UndefValue::get(TmpVec->getType()),
-                                    ConstantVector::get(ShuffleMask),
-                                    "rdx.shuf");
+        Value *Shuf = Builder.CreateShuffleVector(
+            TmpVec, UndefValue::get(TmpVec->getType()),
+            ConstantVector::get(ShuffleMask), "rdx.shuf");
 
         if (Op != Instruction::ICmp && Op != Instruction::FCmp)
           // Floating point operations had to be 'fast' to enable the reduction.
@@ -3701,8 +3671,8 @@ void InnerLoopVectorizer::vectorizeLoop() {
       }
 
       // The result is in the first element of the vector.
-      ReducedPartRdx = Builder.CreateExtractElement(TmpVec,
-                                                    Builder.getInt32(0));
+      ReducedPartRdx =
+          Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
 
       // If the reduction can be performed in a smaller type, we need to extend
       // the reduction to the wider type before we branch to the original loop.
@@ -3726,9 +3696,11 @@ void InnerLoopVectorizer::vectorizeLoop() {
     // We know that the loop is in LCSSA form. We need to update the
     // PHI nodes in the exit blocks.
     for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
-         LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
+                              LEE = LoopExitBlock->end();
+         LEI != LEE; ++LEI) {
       PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
-      if (!LCSSAPhi) break;
+      if (!LCSSAPhi)
+        break;
 
       // All PHINodes need to have a single entry edge, or two if
       // we already fixed them.
@@ -3741,7 +3713,7 @@ void InnerLoopVectorizer::vectorizeLoop() {
         LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
         break;
       }
-    }// end of the LCSSA phi scan.
+    } // end of the LCSSA phi scan.
 
     // Fix the scalar loop reduction variable with the incoming reduction sum
     // from the vector body and from the backedge value.
@@ -3928,9 +3900,11 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
 
 void InnerLoopVectorizer::fixLCSSAPHIs() {
   for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
-       LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
+                            LEE = LoopExitBlock->end();
+       LEI != LEE; ++LEI) {
     PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
-    if (!LCSSAPhi) break;
+    if (!LCSSAPhi)
+      break;
     if (LCSSAPhi->getNumIncomingValues() == 1)
       LCSSAPhi->addIncoming(UndefValue::get(LCSSAPhi->getType()),
                             LoopMiddleBlock);
@@ -3943,7 +3917,7 @@ InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
          "Invalid edge");
 
   // Look for cached value.
-  std::pair<BasicBlock*, BasicBlock*> Edge(Src, Dst);
+  std::pair<BasicBlock *, BasicBlock *> Edge(Src, Dst);
   EdgeMaskCache::iterator ECEntryIt = MaskCache.find(Edge);
   if (ECEntryIt != MaskCache.end())
     return ECEntryIt->second;
@@ -3999,13 +3973,13 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
 void InnerLoopVectorizer::widenPHIInstruction(
     Instruction *PN, InnerLoopVectorizer::VectorParts &Entry, unsigned UF,
     unsigned VF, PhiVector *PV) {
-  PHINode* P = cast<PHINode>(PN);
+  PHINode *P = cast<PHINode>(PN);
   // Handle recurrences.
   if (Legal->isReductionVariable(P) || Legal->isFirstOrderRecurrence(P)) {
     for (unsigned part = 0; part < UF; ++part) {
       // This is phase one of vectorizing PHIs.
-      Type *VecTy = (VF == 1) ? PN->getType() :
-      VectorType::get(PN->getType(), VF);
+      Type *VecTy =
+          (VF == 1) ? PN->getType() : VectorType::get(PN->getType(), VF);
       Entry[part] = PHINode::Create(
           VecTy, 2, "vec.phi", &*LoopVectorBody.back()->getFirstInsertionPt());
     }
@@ -4030,21 +4004,20 @@ void InnerLoopVectorizer::widenPHIInstruction(
     //      SELECT(Mask2, In2,
     //                   ( ...)))
     for (unsigned In = 0; In < NumIncoming; In++) {
-      VectorParts Cond = createEdgeMask(P->getIncomingBlock(In),
-                                        P->getParent());
+      VectorParts Cond =
+          createEdgeMask(P->getIncomingBlock(In), P->getParent());
       VectorParts &In0 = getVectorValue(P->getIncomingValue(In));
 
       for (unsigned part = 0; part < UF; ++part) {
         // We might have single edge PHIs (blocks) - use an identity
         // 'select' for the first PHI operand.
         if (In == 0)
-          Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
-                                             In0[part]);
+          Entry[part] = Builder.CreateSelect(Cond[part], In0[part], In0[part]);
         else
           // Select between the current value and the previous incoming edge
           // based on the incoming mask.
-          Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
-                                             Entry[part], "predphi");
+          Entry[part] = Builder.CreateSelect(Cond[part], In0[part], Entry[part],
+                                             "predphi");
       }
     }
     return;
@@ -4052,67 +4025,64 @@ void InnerLoopVectorizer::widenPHIInstruction(
 
   // This PHINode must be an induction variable.
   // Make sure that we know about it.
-  assert(Legal->getInductionVars()->count(P) &&
-         "Not an induction variable");
+  assert(Legal->getInductionVars()->count(P) && "Not an induction variable");
 
   InductionDescriptor II = Legal->getInductionVars()->lookup(P);
 
   // FIXME: The newly created binary instructions should contain nsw/nuw flags,
   // which can be found from the original scalar operations.
   switch (II.getKind()) {
-    case InductionDescriptor::IK_NoInduction:
-      llvm_unreachable("Unknown induction");
-    case InductionDescriptor::IK_IntInduction: {
-      assert(P->getType() == II.getStartValue()->getType() &&
-             "Types must match");
-      // Handle other induction variables that are now based on the
-      // canonical one.
-      Value *V = Induction;
-      if (P != OldInduction) {
-        V = Builder.CreateSExtOrTrunc(Induction, P->getType());
-        V = II.transform(Builder, V);
-        V->setName("offset.idx");
-      }
-      Value *Broadcasted = getBroadcastInstrs(V);
-      // After broadcasting the induction variable we need to make the vector
-      // consecutive by adding 0, 1, 2, etc.
-      for (unsigned part = 0; part < UF; ++part)
-        Entry[part] = getStepVector(Broadcasted, VF * part, II.getStepValue());
-      return;
+  case InductionDescriptor::IK_NoInduction:
+    llvm_unreachable("Unknown induction");
+  case InductionDescriptor::IK_IntInduction: {
+    assert(P->getType() == II.getStartValue()->getType() && "Types must match");
+    // Handle other induction variables that are now based on the
+    // canonical one.
+    Value *V = Induction;
+    if (P != OldInduction) {
+      V = Builder.CreateSExtOrTrunc(Induction, P->getType());
+      V = II.transform(Builder, V);
+      V->setName("offset.idx");
     }
-    case InductionDescriptor::IK_PtrInduction:
-      // Handle the pointer induction variable case.
-      assert(P->getType()->isPointerTy() && "Unexpected type.");
-      // This is the normalized GEP that starts counting at zero.
-      Value *PtrInd = Induction;
-      PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStepValue()->getType());
-      // This is the vector of results. Notice that we don't generate
-      // vector geps because scalar geps result in better code.
-      for (unsigned part = 0; part < UF; ++part) {
-        if (VF == 1) {
-          int EltIndex = part;
-          Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex);
-          Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
-          Value *SclrGep = II.transform(Builder, GlobalIdx);
-          SclrGep->setName("next.gep");
-          Entry[part] = SclrGep;
-          continue;
-        }
+    Value *Broadcasted = getBroadcastInstrs(V);
+    // After broadcasting the induction variable we need to make the vector
+    // consecutive by adding 0, 1, 2, etc.
+    for (unsigned part = 0; part < UF; ++part)
+      Entry[part] = getStepVector(Broadcasted, VF * part, II.getStepValue());
+    return;
+  }
+  case InductionDescriptor::IK_PtrInduction:
+    // Handle the pointer induction variable case.
+    assert(P->getType()->isPointerTy() && "Unexpected type.");
+    // This is the normalized GEP that starts counting at zero.
+    Value *PtrInd = Induction;
+    PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStepValue()->getType());
+    // This is the vector of results. Notice that we don't generate
+    // vector geps because scalar geps result in better code.
+    for (unsigned part = 0; part < UF; ++part) {
+      if (VF == 1) {
+        int EltIndex = part;
+        Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex);
+        Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
+        Value *SclrGep = II.transform(Builder, GlobalIdx);
+        SclrGep->setName("next.gep");
+        Entry[part] = SclrGep;
+        continue;
+      }
 
-        Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
-        for (unsigned int i = 0; i < VF; ++i) {
-          int EltIndex = i + part * VF;
-          Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex);
-          Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
-          Value *SclrGep = II.transform(Builder, GlobalIdx);
-          SclrGep->setName("next.gep");
-          VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
-                                               Builder.getInt32(i),
-                                               "insert.gep");
-        }
-        Entry[part] = VecVal;
+      Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
+      for (unsigned int i = 0; i < VF; ++i) {
+        int EltIndex = i + part * VF;
+        Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex);
+        Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
+        Value *SclrGep = II.transform(Builder, GlobalIdx);
+        SclrGep->setName("next.gep");
+        VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
+                                             Builder.getInt32(i), "insert.gep");
       }
-      return;
+      Entry[part] = VecVal;
+    }
+    return;
   }
 }
 
@@ -4130,7 +4100,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
       // Vectorize PHINodes.
       widenPHIInstruction(&*it, Entry, UF, VF, PV);
       continue;
-    }// End of PHI.
+    } // End of PHI.
 
     case Instruction::Add:
     case Instruction::FAdd:
@@ -4183,17 +4153,17 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
       // We have to take the 'vectorized' value and pick the first lane.
       // Instcombine will make this a no-op.
       VectorParts &Cond = getVectorValue(it->getOperand(0));
-      VectorParts &Op0  = getVectorValue(it->getOperand(1));
-      VectorParts &Op1  = getVectorValue(it->getOperand(2));
+      VectorParts &Op0 = getVectorValue(it->getOperand(1));
+      VectorParts &Op1 = getVectorValue(it->getOperand(2));
 
-      Value *ScalarCond = (VF == 1) ? Cond[0] :
-        Builder.CreateExtractElement(Cond[0], Builder.getInt32(0));
+      Value *ScalarCond =
+          (VF == 1)
+              ? Cond[0]
+              : Builder.CreateExtractElement(Cond[0], Builder.getInt32(0));
 
       for (unsigned Part = 0; Part < UF; ++Part) {
         Entry[Part] = Builder.CreateSelect(
-          InvariantCond ? ScalarCond : Cond[Part],
-          Op0[Part],
-          Op1[Part]);
+            InvariantCond ? ScalarCond : Cond[Part], Op0[Part], Op1[Part]);
       }
 
       addMetadata(Entry, &*it);
@@ -4226,7 +4196,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
     case Instruction::Store:
     case Instruction::Load:
       vectorizeMemoryInstruction(&*it);
-        break;
+      break;
     case Instruction::ZExt:
     case Instruction::SExt:
     case Instruction::FPToUI:
@@ -4247,8 +4217,8 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
       /// c. other casts depend on pointer size.
       if (CI->getOperand(0) == OldInduction &&
           it->getOpcode() == Instruction::Trunc) {
-        Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction,
-                                               CI->getType());
+        Value *ScalarCast =
+            Builder.CreateCast(CI->getOpcode(), Induction, CI->getType());
         Value *Broadcasted = getBroadcastInstrs(ScalarCast);
         InductionDescriptor II =
             Legal->getInductionVars()->lookup(OldInduction);
@@ -4260,8 +4230,8 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
         break;
       }
       /// Vectorize casts.
-      Type *DestTy = (VF == 1) ? CI->getType() :
-                                 VectorType::get(CI->getType(), VF);
+      Type *DestTy =
+          (VF == 1) ? CI->getType() : VectorType::get(CI->getType(), VF);
 
       VectorParts &A = getVectorValue(it->getOperand(0));
       for (unsigned Part = 0; Part < UF; ++Part)
@@ -4287,9 +4257,8 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
         Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF));
 
       Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
-      if (ID &&
-          (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end ||
-           ID == Intrinsic::lifetime_start)) {
+      if (ID && (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end ||
+                 ID == Intrinsic::lifetime_start)) {
         scalarizeInstruction(&*it);
         break;
       }
@@ -4358,8 +4327,8 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
       // All other instructions are unsupported. Scalarize them.
       scalarizeInstruction(&*it);
       break;
-    }// end of switch.
-  }// end of for_each instr.
+    } // end of switch.
+  }   // end of for_each instr.
 }
 
 void InnerLoopVectorizer::updateAnalysis() {
@@ -4413,7 +4382,8 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
 
   // Collect safe addresses.
   for (Loop::block_iterator BI = TheLoop->block_begin(),
-         BE = TheLoop->block_end(); BI != BE; ++BI) {
+                            BE = TheLoop->block_end();
+       BI != BE; ++BI) {
     BasicBlock *BB = *BI;
 
     if (blockNeedsPredication(BB))
@@ -4430,7 +4400,8 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
   // Collect the blocks that need predication.
   BasicBlock *Header = TheLoop->getHeader();
   for (Loop::block_iterator BI = TheLoop->block_begin(),
-         BE = TheLoop->block_end(); BI != BE; ++BI) {
+                            BE = TheLoop->block_end();
+       BI != BE; ++BI) {
     BasicBlock *BB = *BI;
 
     // We don't support switch statements inside loops.
@@ -4462,9 +4433,8 @@ bool LoopVectorizationLegality::canVectorize() {
   // We must have a loop in canonical form. Loops with indirectbr in them cannot
   // be canonicalized.
   if (!TheLoop->getLoopPreheader()) {
-    emitAnalysis(
-        VectorizationReport() <<
-        "loop control flow is not understood by vectorizer");
+    emitAnalysis(VectorizationReport()
+                 << "loop control flow is not understood by vectorizer");
     return false;
   }
 
@@ -4476,17 +4446,15 @@ bool LoopVectorizationLegality::canVectorize() {
 
   // We must have a single backedge.
   if (TheLoop->getNumBackEdges() != 1) {
-    emitAnalysis(
-        VectorizationReport() <<
-        "loop control flow is not understood by vectorizer");
+    emitAnalysis(VectorizationReport()
+                 << "loop control flow is not understood by vectorizer");
     return false;
   }
 
   // We must have a single exiting block.
   if (!TheLoop->getExitingBlock()) {
-    emitAnalysis(
-        VectorizationReport() <<
-        "loop control flow is not understood by vectorizer");
+    emitAnalysis(VectorizationReport()
+                 << "loop control flow is not understood by vectorizer");
     return false;
   }
 
@@ -4494,15 +4462,14 @@ bool LoopVectorizationLegality::canVectorize() {
   // checked at the end of each iteration. With that we can assume that all
   // instructions in the loop are executed the same number of times.
   if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
-    emitAnalysis(
-        VectorizationReport() <<
-        "loop control flow is not understood by vectorizer");
+    emitAnalysis(VectorizationReport()
+                 << "loop control flow is not understood by vectorizer");
     return false;
   }
 
   // We need to have a loop header.
-  DEBUG(dbgs() << "LV: Found a loop: " <<
-        TheLoop->getHeader()->getName() << '\n');
+  DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
+               << '\n');
 
   // Check if we can if-convert non-single-bb loops.
   unsigned NumBlocks = TheLoop->getNumBlocks();
@@ -4581,7 +4548,7 @@ static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
   return Ty;
 }
 
-static Type* getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
+static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
   Ty0 = convertPointerToIntegerType(DL, Ty0);
   Ty1 = convertPointerToIntegerType(DL, Ty1);
   if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
@@ -4596,7 +4563,7 @@ static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
   // Reduction instructions are allowed to have exit users. All other
   // instructions must not have external users.
   if (!Reductions.count(Inst))
-    //Check that all of the users of the loop are inside the BB.
+    // Check that all of the users of the loop are inside the BB.
     for (User *U : Inst->users()) {
       Instruction *UI = cast<Instruction>(U);
       // This user may be a reduction exit value.
@@ -4619,7 +4586,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
 
   // For each block in the loop.
   for (Loop::block_iterator bb = TheLoop->block_begin(),
-       be = TheLoop->block_end(); bb != be; ++bb) {
+                            be = TheLoop->block_end();
+       bb != be; ++bb) {
 
     // Scan the instructions in the block and look for hazards.
     for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
@@ -4628,8 +4596,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
       if (PHINode *Phi = dyn_cast<PHINode>(it)) {
         Type *PhiTy = Phi->getType();
         // Check that this PHI type is allowed.
-        if (!PhiTy->isIntegerTy() &&
-            !PhiTy->isFloatingPointTy() &&
+        if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
             !PhiTy->isPointerTy()) {
           emitAnalysis(VectorizationReport(&*it)
                        << "loop control flow is not understood by vectorizer");
@@ -4645,9 +4612,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           // identified reduction value with an outside user.
           if (!hasOutsideLoopUser(TheLoop, &*it, AllowedExit))
             continue;
-          emitAnalysis(VectorizationReport(&*it) <<
-                       "value could not be identified as "
-                       "an induction or reduction variable");
+          emitAnalysis(VectorizationReport(&*it)
+                       << "value could not be identified as "
+                          "an induction or reduction variable");
           return false;
         }
 
@@ -4670,9 +4637,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
 
           // Int inductions are special because we only allow one IV.
           if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
-              ID.getStepValue()->isOne() &&
-              isa<Constant>(ID.getStartValue()) &&
-                cast<Constant>(ID.getStartValue())->isNullValue()) {
+              ID.getStepValue()->isOne() && isa<Constant>(ID.getStartValue()) &&
+              cast<Constant>(ID.getStartValue())->isNullValue()) {
             // Use the phi node with the widest type as induction. Use the last
             // one if there are multiple (no good reason for doing this other
             // than it is expedient). We've checked that it begins at zero and
@@ -4686,9 +4652,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           // Until we explicitly handle the case of an induction variable with
           // an outside loop user we have to give up vectorizing this loop.
           if (hasOutsideLoopUser(TheLoop, &*it, AllowedExit)) {
-            emitAnalysis(VectorizationReport(&*it) <<
-                         "use of induction value outside of the "
-                         "loop is not handled by vectorizer");
+            emitAnalysis(VectorizationReport(&*it)
+                         << "use of induction value outside of the "
+                            "loop is not handled by vectorizer");
             return false;
           }
 
@@ -4709,19 +4675,20 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           continue;
         }
 
-        emitAnalysis(VectorizationReport(&*it) <<
-                     "value that could not be identified as "
-                     "reduction is used outside the loop");
-        DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
+        emitAnalysis(VectorizationReport(&*it)
+                     << "value that could not be identified as "
+                        "reduction is used outside the loop");
+        DEBUG(dbgs() << "LV: Found an unidentified PHI." << *Phi << "\n");
         return false;
-      }// end of PHI handling
+      } // end of PHI handling
 
       // We handle calls that:
       //   * Are debug info intrinsics.
       //   * Have a mapping to an IR intrinsic.
       //   * Have a vector version available.
       CallInst *CI = dyn_cast<CallInst>(it);
-      if (CI && !getVectorIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI) &&
+      if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
+          !isa<DbgInfoIntrinsic>(CI) &&
           !(CI->getCalledFunction() && TLI &&
             TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
         emitAnalysis(VectorizationReport(&*it)
@@ -4732,8 +4699,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
 
       // Intrinsics such as powi,cttz and ctlz are legal to vectorize if the
       // second argument is the same (i.e. loop invariant)
-      if (CI &&
-          hasVectorInstrinsicScalarOpd(getVectorIntrinsicIDForCall(CI, TLI), 1)) {
+      if (CI && hasVectorInstrinsicScalarOpd(
+                    getVectorIntrinsicIDForCall(CI, TLI), 1)) {
         auto *SE = PSE.getSE();
         if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(1)), TheLoop)) {
           emitAnalysis(VectorizationReport(&*it)
@@ -4746,7 +4713,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
       // Check that the instruction return type is vectorizable.
       // Also, we can't vectorize extractelement instructions.
       if ((!VectorType::isValidElementType(it->getType()) &&
-           !it->getType()->isVoidTy()) || isa<ExtractElementInst>(it)) {
+           !it->getType()->isVoidTy()) ||
+          isa<ExtractElementInst>(it)) {
         emitAnalysis(VectorizationReport(&*it)
                      << "instruction return type cannot be vectorized");
         DEBUG(dbgs() << "LV: Found unvectorizable type.\n");
@@ -4757,8 +4725,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
       if (StoreInst *ST = dyn_cast<StoreInst>(it)) {
         Type *T = ST->getValueOperand()->getType();
         if (!VectorType::isValidElementType(T)) {
-          emitAnalysis(VectorizationReport(ST) <<
-                       "store instruction cannot be vectorized");
+          emitAnalysis(VectorizationReport(ST)
+                       << "store instruction cannot be vectorized");
           return false;
         }
         if (EnableMemAccessVersioning)
@@ -4768,14 +4736,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
         if (EnableMemAccessVersioning)
           collectStridedAccess(LI);
 
-      // FP instructions can allow unsafe algebra, thus vectorizable by
-      // non-IEEE-754 compliant SIMD units.
-      // This applies to floating-point math operations and calls, not memory
-      // operations, shuffles, or casts, as they don't change precision or
-      // semantics.
+        // FP instructions can allow unsafe algebra, thus vectorizable by
+        // non-IEEE-754 compliant SIMD units.
+        // This applies to floating-point math operations and calls, not memory
+        // operations, shuffles, or casts, as they don't change precision or
+        // semantics.
       } else if (it->getType()->isFloatingPointTy() &&
-                (CI || it->isBinaryOp()) &&
-                !it->hasUnsafeAlgebra()) {
+                 (CI || it->isBinaryOp()) && !it->hasUnsafeAlgebra()) {
         DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
         Hints->setPotentiallyUnsafe();
       }
@@ -4783,13 +4750,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
       // Reduction instructions are allowed to have exit users.
       // All other instructions must not have external users.
       if (hasOutsideLoopUser(TheLoop, &*it, AllowedExit)) {
-        emitAnalysis(VectorizationReport(&*it) <<
-                     "value cannot be used outside the loop");
+        emitAnalysis(VectorizationReport(&*it)
+                     << "value cannot be used outside the loop");
         return false;
       }
 
     } // next instr.
-
   }
 
   if (!Induction) {
@@ -4832,7 +4798,7 @@ void LoopVectorizationLegality::collectStridedAccess(Value *MemAccess) {
 void LoopVectorizationLegality::collectLoopUniforms() {
   // We now know that the loop is vectorizable!
   // Collect variables that will remain uniform after vectorization.
-  std::vector<Value*> Worklist;
+  std::vector<Value *> Worklist;
   BasicBlock *Latch = TheLoop->getLoopLatch();
 
   // Start with the conditional branch and walk up the block.
@@ -4842,9 +4808,9 @@ void LoopVectorizationLegality::collectLoopUniforms() {
   // after vectorization (and subsequent cleanup) and, until revectorization is
   // supported, all dependencies must also be uniform.
   for (Loop::block_iterator B = TheLoop->block_begin(),
-       BE = TheLoop->block_end(); B != BE; ++B)
-    for (BasicBlock::iterator I = (*B)->begin(), IE = (*B)->end();
-         I != IE; ++I)
+                            BE = TheLoop->block_end();
+       B != BE; ++B)
+    for (BasicBlock::iterator I = (*B)->begin(), IE = (*B)->end(); I != IE; ++I)
       if (I->getType()->isPointerTy() && isConsecutivePtr(&*I))
         Worklist.insert(Worklist.end(), I->op_begin(), I->op_end());
 
@@ -4889,7 +4855,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
 }
 
 bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
-  Value *In0 = const_cast<Value*>(V);
+  Value *In0 = const_cast<Value *>(V);
   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
   if (!PN)
     return false;
@@ -4901,12 +4867,12 @@ bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
   return FirstOrderRecurrences.count(Phi);
 }
 
-bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB)  {
+bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
   return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
 }
 
-bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB,
-                                           SmallPtrSetImpl<Value *> &SafePtrs) {
+bool LoopVectorizationLegality::blockCanBePredicated(
+    BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs) {
   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
 
   for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
@@ -4963,7 +4929,8 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB,
 
     // The instructions below can trap.
     switch (it->getOpcode()) {
-    default: continue;
+    default:
+      continue;
     case Instruction::UDiv:
     case Instruction::SDiv:
     case Instruction::URem:
@@ -5157,20 +5124,22 @@ void InterleavedAccessInfo::analyzeInterleaving(
 LoopVectorizationCostModel::VectorizationFactor
 LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
   // Width 1 means no vectorize
-  VectorizationFactor Factor = { 1U, 0U };
+  VectorizationFactor Factor = {1U, 0U};
   if (OptForSize && Legal->getRuntimePointerChecking()->Need) {
-    emitAnalysis(VectorizationReport() <<
-                 "runtime pointer checks needed. Enable vectorization of this "
-                 "loop with '#pragma clang loop vectorize(enable)' when "
-                 "compiling with -Os/-Oz");
-    DEBUG(dbgs() <<
-          "LV: Aborting. Runtime ptr check is required with -Os/-Oz.\n");
+    emitAnalysis(
+        VectorizationReport()
+        << "runtime pointer checks needed. Enable vectorization of this "
+           "loop with '#pragma clang loop vectorize(enable)' when "
+           "compiling with -Os/-Oz");
+    DEBUG(dbgs()
+          << "LV: Aborting. Runtime ptr check is required with -Os/-Oz.\n");
     return Factor;
   }
 
   if (!EnableCondStoresVectorization && Legal->getNumPredStores()) {
-    emitAnalysis(VectorizationReport() <<
-                 "store that is conditionally executed prevents vectorization");
+    emitAnalysis(
+        VectorizationReport()
+        << "store that is conditionally executed prevents vectorization");
     DEBUG(dbgs() << "LV: No vectorization. There are conditional stores.\n");
     return Factor;
   }
@@ -5186,14 +5155,14 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
   unsigned MaxSafeDepDist = -1U;
   if (Legal->getMaxSafeDepDistBytes() != -1U)
     MaxSafeDepDist = Legal->getMaxSafeDepDistBytes() * 8;
-  WidestRegister = ((WidestRegister < MaxSafeDepDist) ?
-                    WidestRegister : MaxSafeDepDist);
+  WidestRegister =
+      ((WidestRegister < MaxSafeDepDist) ? WidestRegister : MaxSafeDepDist);
   unsigned MaxVectorSize = WidestRegister / WidestType;
 
   DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType << " / "
                << WidestType << " bits.\n");
-  DEBUG(dbgs() << "LV: The Widest register is: "
-          << WidestRegister << " bits.\n");
+  DEBUG(dbgs() << "LV: The Widest register is: " << WidestRegister
+               << " bits.\n");
 
   if (MaxVectorSize == 0) {
     DEBUG(dbgs() << "LV: The target has no vector registers.\n");
@@ -5201,7 +5170,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
   }
 
   assert(MaxVectorSize <= 64 && "Did not expect to pack so many elements"
-         " into one vector!");
+                                " into one vector!");
 
   unsigned VF = MaxVectorSize;
   if (MaximizeBandwidth && !OptForSize) {
@@ -5229,9 +5198,9 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
   if (OptForSize) {
     // If we are unable to calculate the trip count then don't try to vectorize.
     if (TC < 2) {
-      emitAnalysis
-        (VectorizationReport() <<
-         "unable to calculate the loop count due to complex control flow");
+      emitAnalysis(
+          VectorizationReport()
+          << "unable to calculate the loop count due to complex control flow");
       DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n");
       return Factor;
     }
@@ -5244,11 +5213,11 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
     else {
       // If the trip count that we found modulo the vectorization factor is not
       // zero then we require a tail.
-      emitAnalysis(VectorizationReport() <<
-                   "cannot optimize for size and vectorize at the "
-                   "same time. Enable vectorization of this loop "
-                   "with '#pragma clang loop vectorize(enable)' "
-                   "when compiling with -Os/-Oz");
+      emitAnalysis(VectorizationReport()
+                   << "cannot optimize for size and vectorize at the "
+                      "same time. Enable vectorization of this loop "
+                      "with '#pragma clang loop vectorize(enable)' "
+                      "when compiling with -Os/-Oz");
       DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n");
       return Factor;
     }
@@ -5277,17 +5246,18 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
     Cost = expectedCost(Width).first / (float)Width;
   }
 
-  for (unsigned i=2; i <= VF; i*=2) {
+  for (unsigned i = 2; i <= VF; i *= 2) {
     // Notice that the vector loop needs to be executed less times, so
     // we need to divide the cost of the vector loops by the width of
     // the vector elements.
     VectorizationCostTy C = expectedCost(i);
     float VectorCost = C.first / (float)i;
-    DEBUG(dbgs() << "LV: Vector loop of width " << i << " costs: " <<
-          (int)VectorCost << ".\n");
+    DEBUG(dbgs() << "LV: Vector loop of width " << i
+                 << " costs: " << (int)VectorCost << ".\n");
     if (!C.second && !ForceVectorization) {
-      DEBUG(dbgs() << "LV: Not considering vector loop of width " << i <<
-            " because it will not generate any vector instructions.\n");
+      DEBUG(
+          dbgs() << "LV: Not considering vector loop of width " << i
+                 << " because it will not generate any vector instructions.\n");
       continue;
     }
     if (VectorCost < Cost) {
@@ -5299,7 +5269,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
   DEBUG(if (ForceVectorization && Width > 1 && Cost >= ScalarCost) dbgs()
         << "LV: Vectorization seems to be not beneficial, "
         << "but was forced by a user.\n");
-  DEBUG(dbgs() << "LV: Selecting VF: "<< Width << ".\n");
+  DEBUG(dbgs() << "LV: Selecting VF: " << Width << ".\n");
   Factor.Width = Width;
   Factor.Cost = Width * Cost;
   return Factor;
@@ -5313,7 +5283,8 @@ LoopVectorizationCostModel::getSmallestAndWidestTypes() {
 
   // For each block.
   for (Loop::block_iterator bb = TheLoop->block_begin(),
-       be = TheLoop->block_end(); bb != be; ++bb) {
+                            be = TheLoop->block_end();
+       bb != be; ++bb) {
     BasicBlock *BB = *bb;
 
     // For each instruction in the loop.
@@ -5389,8 +5360,8 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
     return 1;
 
   unsigned TargetNumRegisters = TTI.getNumberOfRegisters(VF > 1);
-  DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters <<
-        " registers\n");
+  DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters
+               << " registers\n");
 
   if (VF == 1) {
     if (ForceTargetNumScalarRegs.getNumOccurrences() > 0)
@@ -5479,8 +5450,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
     // by this point), we can increase the critical path length if the loop
     // we're interleaving is inside another loop. Limit, by default to 2, so the
     // critical path only gets increased by one reduction operation.
-    if (Legal->getReductionVars()->size() &&
-        TheLoop->getLoopDepth() > 1) {
+    if (Legal->getReductionVars()->size() && TheLoop->getLoopDepth() > 1) {
       unsigned F = static_cast<unsigned>(MaxNestedScalarReductionIC);
       SmallIC = std::min(SmallIC, F);
       StoresIC = std::min(StoresIC, F);
@@ -5537,20 +5507,20 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
   // Each 'key' in the map opens a new interval. The values
   // of the map are the index of the 'last seen' usage of the
   // instruction that is the key.
-  typedef DenseMap<Instruction*, unsigned> IntervalMap;
+  typedef DenseMap<Instruction *, unsigned> IntervalMap;
   // Maps instruction to its index.
-  DenseMap<unsigned, Instruction*> IdxToInstr;
+  DenseMap<unsigned, Instruction *> IdxToInstr;
   // Marks the end of each interval.
   IntervalMap EndPoint;
   // Saves the list of instruction indices that are used in the loop.
-  SmallSet<Instruction*, 8> Ends;
+  SmallSet<Instruction *, 8> Ends;
   // Saves the list of values that are used in the loop but are
   // defined outside the loop, such as arguments and constants.
-  SmallPtrSet<Value*, 8> LoopInvariants;
+  SmallPtrSet<Value *, 8> LoopInvariants;
 
   unsigned Index = 0;
-  for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(),
-       be = DFS.endRPO(); bb != be; ++bb) {
+  for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(), be = DFS.endRPO();
+       bb != be; ++bb) {
     RU.NumInstructions += (*bb)->size();
     for (Instruction &I : **bb) {
       IdxToInstr[Index++] = &I;
@@ -5561,7 +5531,8 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
         Instruction *Instr = dyn_cast<Instruction>(U);
 
         // Ignore non-instruction values such as arguments, constants, etc.
-        if (!Instr) continue;
+        if (!Instr)
+          continue;
 
         // If this instruction is outside the loop then record it and continue.
         if (!TheLoop->contains(Instr)) {
@@ -5577,15 +5548,15 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
   }
 
   // Saves the list of intervals that end with the index in 'key'.
-  typedef SmallVector<Instruction*, 2> InstrList;
+  typedef SmallVector<Instruction *, 2> InstrList;
   DenseMap<unsigned, InstrList> TransposeEnds;
 
   // Transpose the EndPoints to a list of values that end at each index.
-  for (IntervalMap::iterator it = EndPoint.begin(), e = EndPoint.end();
-       it != e; ++it)
+  for (IntervalMap::iterator it = EndPoint.begin(), e = EndPoint.end(); it != e;
+       ++it)
     TransposeEnds[it->second].push_back(it->first);
 
-  SmallSet<Instruction*, 8> OpenIntervals;
+  SmallSet<Instruction *, 8> OpenIntervals;
 
   // Get the size of the widest register.
   unsigned MaxSafeDepDist = -1U;
@@ -5611,7 +5582,8 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
   for (unsigned int i = 0; i < Index; ++i) {
     Instruction *I = IdxToInstr[i];
     // Ignore instructions that are never used within the loop.
-    if (!Ends.count(I)) continue;
+    if (!Ends.count(I))
+      continue;
 
     // Skip ignored values.
     if (ValuesToIgnore.count(I))
@@ -5652,7 +5624,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
         Invariant += GetRegUsage(Inst->getType(), VFs[i]);
     }
 
-    DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] <<  '\n');
+    DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n');
     DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsages[i] << '\n');
     DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << '\n');
     DEBUG(dbgs() << "LV(REG): LoopSize: " << RU.NumInstructions << '\n');
@@ -5671,7 +5643,8 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) {
 
   // For each block.
   for (Loop::block_iterator bb = TheLoop->block_begin(),
-       be = TheLoop->block_end(); bb != be; ++bb) {
+                            be = TheLoop->block_end();
+       bb != be; ++bb) {
     VectorizationCostTy BlockCost;
     BasicBlock *BB = *bb;
 
@@ -5693,8 +5666,8 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) {
 
       BlockCost.first += C.first;
       BlockCost.second |= C.second;
-      DEBUG(dbgs() << "LV: Found an estimated cost of " << C.first <<
-            " for VF " << VF << " For instruction: " << *it << '\n');
+      DEBUG(dbgs() << "LV: Found an estimated cost of " << C.first << " for VF "
+                   << VF << " For instruction: " << *it << '\n');
     }
 
     // We assume that if-converted blocks have a 50% chance of being executed.
@@ -5720,7 +5693,7 @@ static bool isGatherOrScatterLegal(Instruction *I, Value *Ptr,
                                    LoopVectorizationLegality *Legal) {
   Type *DataTy = cast<PointerType>(Ptr->getType())->getElementType();
   return (isa<LoadInst>(I) && Legal->isLegalMaskedGather(DataTy)) ||
-    (isa<StoreInst>(I) && Legal->isLegalMaskedScatter(DataTy));
+         (isa<StoreInst>(I) && Legal->isLegalMaskedScatter(DataTy));
 }
 
 /// \brief Check whether the address computation for a non-consecutive memory
@@ -5791,14 +5764,14 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
   Type *VectorTy;
   unsigned C = getInstructionCost(I, VF, VectorTy);
 
-  bool TypeNotScalarized = VF > 1 && !VectorTy->isVoidTy() &&
-                           TTI.getNumberOfParts(VectorTy) < VF;
+  bool TypeNotScalarized =
+      VF > 1 && !VectorTy->isVoidTy() && TTI.getNumberOfParts(VectorTy) < VF;
   return VectorizationCostTy(C, TypeNotScalarized);
 }
 
-unsigned
-LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF,
-                                               Type *&VectorTy) {
+unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
+                                                        unsigned VF,
+                                                        Type *&VectorTy) {
   Type *RetTy = I->getType();
   if (VF > 1 && MinBWs.count(I))
     RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
@@ -5850,9 +5823,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF,
     // Certain instructions can be cheaper to vectorize if they have a constant
     // second vector operand. One example of this are shifts on x86.
     TargetTransformInfo::OperandValueKind Op1VK =
-      TargetTransformInfo::OK_AnyValue;
+        TargetTransformInfo::OK_AnyValue;
     TargetTransformInfo::OperandValueKind Op2VK =
-      TargetTransformInfo::OK_AnyValue;
+        TargetTransformInfo::OK_AnyValue;
     TargetTransformInfo::OperandValueProperties Op1VP =
         TargetTransformInfo::OP_None;
     TargetTransformInfo::OperandValueProperties Op2VP =
@@ -5903,28 +5876,27 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF,
   case Instruction::Load: {
     StoreInst *SI = dyn_cast<StoreInst>(I);
     LoadInst *LI = dyn_cast<LoadInst>(I);
-    Type *ValTy = (SI ? SI->getValueOperand()->getType() :
-                   LI->getType());
+    Type *ValTy = (SI ? SI->getValueOperand()->getType() : LI->getType());
     VectorTy = ToVectorTy(ValTy, VF);
 
     unsigned Alignment = SI ? SI->getAlignment() : LI->getAlignment();
-    unsigned AS = SI ? SI->getPointerAddressSpace() :
-      LI->getPointerAddressSpace();
+    unsigned AS =
+        SI ? SI->getPointerAddressSpace() : LI->getPointerAddressSpace();
     Value *Ptr = SI ? SI->getPointerOperand() : LI->getPointerOperand();
     // We add the cost of address computation here instead of with the gep
     // instruction because only here we know whether the operation is
     // scalarized.
     if (VF == 1)
       return TTI.getAddressComputationCost(VectorTy) +
-        TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
+             TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
 
     if (LI && Legal->isUniform(Ptr)) {
       // Scalar load + broadcast
       unsigned Cost = TTI.getAddressComputationCost(ValTy->getScalarType());
       Cost += TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
                                   Alignment, AS);
-      return Cost + TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast,
-                                       ValTy);
+      return Cost +
+             TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, ValTy);
     }
 
     // For an interleaved access, calculate the total cost of the whole
@@ -5969,8 +5941,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF,
 
     // Scalarized loads/stores.
     int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
-    bool UseGatherOrScatter = (ConsecutiveStride == 0) &&
-      isGatherOrScatterLegal(I, Ptr, Legal);
+    bool UseGatherOrScatter =
+        (ConsecutiveStride == 0) && isGatherOrScatterLegal(I, Ptr, Legal);
 
     bool Reverse = ConsecutiveStride < 0;
     const DataLayout &DL = I->getModule()->getDataLayout();
@@ -5979,7 +5951,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF,
     if ((!ConsecutiveStride && !UseGatherOrScatter) ||
         ScalarAllocatedSize != VectorElementSize) {
       bool IsComplexComputation =
-        isLikelyComplexAddressComputation(Ptr, Legal, SE, TheLoop);
+          isLikelyComplexAddressComputation(Ptr, Legal, SE, TheLoop);
       unsigned Cost = 0;
       // The cost of extracting from the value vector and pointer vector.
       Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
@@ -5989,15 +5961,16 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF,
         // In case of STORE, the cost of ExtractElement from the vector.
         // In case of LOAD, the cost of InsertElement into the returned
         // vector.
-        Cost += TTI.getVectorInstrCost(SI ? Instruction::ExtractElement :
-                                            Instruction::InsertElement,
-                                            VectorTy, i);
+        Cost += TTI.getVectorInstrCost(SI ? Instruction::ExtractElement
+                                          : Instruction::InsertElement,
+                                       VectorTy, i);
       }
 
       // The cost of the scalar loads/stores.
       Cost += VF * TTI.getAddressComputationCost(PtrTy, IsComplexComputation);
-      Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
-                                       Alignment, AS);
+      Cost += VF *
+              TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
+                                  Alignment, AS);
       return Cost;
     }
 
@@ -6006,19 +5979,18 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF,
       assert(ConsecutiveStride == 0 &&
              "Gather/Scatter are not used for consecutive stride");
       return Cost +
-        TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
-                                   Legal->isMaskRequired(I), Alignment);
+             TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
+                                        Legal->isMaskRequired(I), Alignment);
     }
     // Wide load/stores.
     if (Legal->isMaskRequired(I))
-      Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment,
-                                        AS);
+      Cost +=
+          TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
     else
       Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
 
     if (Reverse)
-      Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
-                                  VectorTy, 0);
+      Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0);
     return Cost;
   }
   case Instruction::ZExt:
@@ -6051,13 +6023,13 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF,
       Type *MinVecTy = VectorTy;
       if (I->getOpcode() == Instruction::Trunc) {
         SrcVecTy = smallestIntegerVectorType(SrcVecTy, MinVecTy);
-        VectorTy = largestIntegerVectorType(ToVectorTy(I->getType(), VF),
-                                            MinVecTy);
+        VectorTy =
+            largestIntegerVectorType(ToVectorTy(I->getType(), VF), MinVecTy);
       } else if (I->getOpcode() == Instruction::ZExt ||
                  I->getOpcode() == Instruction::SExt) {
         SrcVecTy = largestIntegerVectorType(SrcVecTy, MinVecTy);
-        VectorTy = smallestIntegerVectorType(ToVectorTy(I->getType(), VF),
-                                             MinVecTy);
+        VectorTy =
+            smallestIntegerVectorType(ToVectorTy(I->getType(), VF), MinVecTy);
       }
     }
 
@@ -6078,10 +6050,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF,
     unsigned Cost = 0;
 
     if (!RetTy->isVoidTy() && VF != 1) {
-      unsigned InsCost = TTI.getVectorInstrCost(Instruction::InsertElement,
-                                                VectorTy);
-      unsigned ExtCost = TTI.getVectorInstrCost(Instruction::ExtractElement,
-                                                VectorTy);
+      unsigned InsCost =
+          TTI.getVectorInstrCost(Instruction::InsertElement, VectorTy);
+      unsigned ExtCost =
+          TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy);
 
       // The cost of inserting the results plus extracting each one of the
       // operands.
@@ -6093,7 +6065,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF,
     Cost += VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy);
     return Cost;
   }
-  }// end of switch.
+  } // end of switch.
 }
 
 char LoopVectorize::ID = 0;
@@ -6115,9 +6087,9 @@ INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass)
 INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
 
 namespace llvm {
-  Pass *createLoopVectorizePass(bool NoUnrolling, bool AlwaysVectorize) {
-    return new LoopVectorize(NoUnrolling, AlwaysVectorize);
-  }
+Pass *createLoopVectorizePass(bool NoUnrolling, bool AlwaysVectorize) {
+  return new LoopVectorize(NoUnrolling, AlwaysVectorize);
+}
 }
 
 bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
@@ -6132,7 +6104,6 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
   return false;
 }
 
-
 void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
                                              bool IfPredicateStore) {
   assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
@@ -6174,8 +6145,7 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
   // Does this instruction return a value ?
   bool IsVoidRetTy = Instr->getType()->isVoidTy();
 
-  Value *UndefVec = IsVoidRetTy ? nullptr :
-  UndefValue::get(Instr->getType());
+  Value *UndefVec = IsVoidRetTy ? nullptr : UndefValue::get(Instr->getType());
   // Create a new entry in the WidenMap and initialize it to Undef or Null.
   VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
 
@@ -6202,31 +6172,30 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
     }
 
     Instruction *Cloned = Instr->clone();
-      if (!IsVoidRetTy)
-        Cloned->setName(Instr->getName() + ".cloned");
-      // Replace the operands of the cloned instructions with extracted scalars.
-      for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
-        Value *Op = Params[op][Part];
-        Cloned->setOperand(op, Op);
-      }
+    if (!IsVoidRetTy)
+      Cloned->setName(Instr->getName() + ".cloned");
+    // Replace the operands of the cloned instructions with extracted scalars.
+    for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
+      Value *Op = Params[op][Part];
+      Cloned->setOperand(op, Op);
+    }
 
-      // Place the cloned scalar in the new loop.
-      Builder.Insert(Cloned);
+    // Place the cloned scalar in the new loop.
+    Builder.Insert(Cloned);
 
-      // If we just cloned a new assumption, add it the assumption cache.
-      if (auto *II = dyn_cast<IntrinsicInst>(Cloned))
-        if (II->getIntrinsicID() == Intrinsic::assume)
-          AC->registerAssumption(II);
+    // If we just cloned a new assumption, add it the assumption cache.
+    if (auto *II = dyn_cast<IntrinsicInst>(Cloned))
+      if (II->getIntrinsicID() == Intrinsic::assume)
+        AC->registerAssumption(II);
 
-      // If the original scalar returns a value we need to place it in a vector
-      // so that future users will be able to use it.
-      if (!IsVoidRetTy)
-        VecResults[Part] = Cloned;
+    // If the original scalar returns a value we need to place it in a vector
+    // so that future users will be able to use it.
+    if (!IsVoidRetTy)
+      VecResults[Part] = Cloned;
 
-      // End if-block.
-      if (IfPredicateStore)
-        PredicatedStores.push_back(std::make_pair(cast<StoreInst>(Cloned),
-                                                  Cmp));
+    // End if-block.
+    if (IfPredicateStore)
+      PredicatedStores.push_back(std::make_pair(cast<StoreInst>(Cloned), Cmp));
   }
 }
 
@@ -6237,13 +6206,9 @@ void InnerLoopUnroller::vectorizeMemoryInstruction(Instruction *Instr) {
   return scalarizeInstruction(Instr, IfPredicateStore);
 }
 
-Value *InnerLoopUnroller::reverseVector(Value *Vec) {
-  return Vec;
-}
+Value *InnerLoopUnroller::reverseVector(Value *Vec) { return Vec; }
 
-Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) {
-  return V;
-}
+Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; }
 
 Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, Value *Step) {
   // When unrolling and the VF is 1, we only need to add a simple scalar.
-- 
2.50.1