Revert [SLP] Fix for PR31847: Assertion failed: (isLoopInvariant(Operands[i], L)...

author Jordan Rupprecht <rupprecht@google.com>

Thu, 26 Sep 2019 22:09:17 +0000 (22:09 +0000)

committer Jordan Rupprecht <rupprecht@google.com>

Thu, 26 Sep 2019 22:09:17 +0000 (22:09 +0000)
author Jordan Rupprecht <rupprecht@google.com>
Thu, 26 Sep 2019 22:09:17 +0000 (22:09 +0000)
committer Jordan Rupprecht <rupprecht@google.com>
Thu, 26 Sep 2019 22:09:17 +0000 (22:09 +0000)
diff --git a/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/include/llvm/Transforms/Vectorize/SLPVectorizer.h

index 32ccc8a4638051040f1dd1422676ea0c4c5a18c9..ac6afb761d4d53b5b2ae49087dbf35a38d94f341 100644 (file)
--- a/include/llvm/Transforms/Vectorize/SLPVectorizer.h
+++ b/include/llvm/Transforms/Vectorize/SLPVectorizer.h
@@ -24,6 +24,7 @@
  #include "llvm/ADT/SmallVector.h"
  #include "llvm/Analysis/AliasAnalysis.h"
  #include "llvm/IR/PassManager.h"
+#include "llvm/IR/ValueHandle.h"
  
  namespace llvm {
  
@@ -59,8 +60,8 @@ extern cl::opt<bool> RunSLPVectorization;
  struct SLPVectorizerPass : public PassInfoMixin<SLPVectorizerPass> {
    using StoreList = SmallVector<StoreInst *, 8>;
    using StoreListMap = MapVector<Value *, StoreList>;
-  using GEPList = SmallVector<GetElementPtrInst *, 8>;
-  using GEPListMap = MapVector<Value *, GEPList>;
+  using WeakTrackingVHList = SmallVector<WeakTrackingVH, 8>;
+  using WeakTrackingVHListMap = MapVector<Value *, WeakTrackingVHList>;
  
    ScalarEvolution *SE = nullptr;
    TargetTransformInfo *TTI = nullptr;
@@ -130,7 +131,7 @@ private:
  
    /// Tries to vectorize constructs started from CmpInst, InsertValueInst or
    /// InsertElementInst instructions.
-  bool vectorizeSimpleInstructions(SmallVectorImpl<Instruction *> &Instructions,
+  bool vectorizeSimpleInstructions(SmallVectorImpl<WeakVH> &Instructions,
                                     BasicBlock *BB, slpvectorizer::BoUpSLP &R);
  
    /// Scan the basic block and look for patterns that are likely to start
@@ -146,7 +147,7 @@ private:
    StoreListMap Stores;
  
    /// The getelementptr instructions in a basic block organized by base pointer.
-  GEPListMap GEPs;
+  WeakTrackingVHListMap GEPs;
  };
  
  } // end namespace llvm
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp

index 71b0abbf73933090d15f02e61b196f8ca0341819..12c114f83d4bee41d58943fc44838140669c9580 100644 (file)
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1121,14 +1121,6 @@ public:
  #endif
    };
  
-  /// Checks if the instruction is marked for deletion.
-  bool isDeleted(Instruction *I) const { return DeletedInstructions.count(I); }
-
-  /// Marks values for later deletion.
-  void eraseInstructions(ArrayRef<Value *> AV);
-
-  ~BoUpSLP();
-
  private:
    /// Checks if all users of \p I are the part of the vectorization tree.
    bool areAllUsersVectorized(Instruction *I) const;
@@ -1499,12 +1491,14 @@ private:
    /// AliasCache, which can happen if a new instruction is allocated at the
    /// same address as a previously deleted instruction.
    void eraseInstruction(Instruction *I) {
-    DeletedInstructions.insert(I);
+    I->removeFromParent();
+    I->dropAllReferences();
+    DeletedInstructions.emplace_back(I);
    }
  
    /// Temporary store for deleted instructions. Instructions will be deleted
    /// eventually when the BoUpSLP is destructed.
-  SmallPtrSet<Instruction *, 8> DeletedInstructions;
+  SmallVector<unique_value, 8> DeletedInstructions;
  
    /// A list of values that need to extracted out of the tree.
    /// This list holds pairs of (Internal Scalar : External User). External User
@@ -2061,22 +2055,6 @@ template <> struct DOTGraphTraits<BoUpSLP *> : public DefaultDOTGraphTraits {
  
  } // end namespace llvm
  
-BoUpSLP::~BoUpSLP() {
-  for (auto *I : DeletedInstructions)
-    I->dropAllReferences();
-  for (auto *I : DeletedInstructions) {
-    assert(I->use_empty() && "trying to erase instruction with users.");
-    I->eraseFromParent();
-  }
-}
-
-void BoUpSLP::eraseInstructions(ArrayRef<Value *> AV) {
-  for (auto *V : AV) {
-    if (auto *I = dyn_cast<Instruction>(V))
-      eraseInstruction(I);
-  };
-}
-
  void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
                          ArrayRef<Value *> UserIgnoreLst) {
    ExtraValueToDebugLocsMap ExternallyUsedValues;
@@ -3563,7 +3541,7 @@ Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
    // Generate the 'InsertElement' instruction.
    for (unsigned i = 0; i < Ty->getNumElements(); ++i) {
      Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
-    if (auto *Insrt = dyn_cast<InsertElementInst>(Vec)) {
+    if (Instruction *Insrt = dyn_cast<Instruction>(Vec)) {
        GatherSeq.insert(Insrt);
        CSEBlocks.insert(Insrt->getParent());
  
@@ -4312,18 +4290,20 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
      for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
        Value *Scalar = Entry->Scalars[Lane];
  
-#ifndef NDEBUG
        Type *Ty = Scalar->getType();
        if (!Ty->isVoidTy()) {
+#ifndef NDEBUG
          for (User *U : Scalar->users()) {
            LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n");
  
-          // It is legal to delete users in the ignorelist.
+          // It is legal to replace users in the ignorelist by undef.
            assert((getTreeEntry(U) || is_contained(UserIgnoreList, U)) &&
-                 "Deleting out-of-tree value");
+                 "Replacing out-of-tree value with undef");
          }
-      }
  #endif
+        Value *Undef = UndefValue::get(Ty);
+        Scalar->replaceAllUsesWith(Undef);
+      }
        LLVM_DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n");
        eraseInstruction(cast<Instruction>(Scalar));
      }
@@ -4339,7 +4319,7 @@ void BoUpSLP::optimizeGatherSequence() {
                      << " gather sequences instructions.\n");
    // LICM InsertElementInst sequences.
    for (Instruction *I : GatherSeq) {
-    if (isDeleted(I))
+    if (!isa<InsertElementInst>(I) && !isa<ShuffleVectorInst>(I))
        continue;
  
      // Check if this block is inside a loop.
@@ -4393,8 +4373,6 @@ void BoUpSLP::optimizeGatherSequence() {
      // For all instructions in blocks containing gather sequences:
      for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) {
        Instruction *In = &*it++;
-      if (isDeleted(In))
-        continue;
        if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In))
          continue;
  
@@ -5277,6 +5255,19 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
    return Changed;
  }
  
+/// Check that the Values in the slice in VL array are still existent in
+/// the WeakTrackingVH array.
+/// Vectorization of part of the VL array may cause later values in the VL array
+/// to become invalid. We track when this has happened in the WeakTrackingVH
+/// array.
+static bool hasValueBeenRAUWed(ArrayRef<Value *> VL,
+                               ArrayRef<WeakTrackingVH> VH, unsigned SliceBegin,
+                               unsigned SliceSize) {
+  VL = VL.slice(SliceBegin, SliceSize);
+  VH = VH.slice(SliceBegin, SliceSize);
+  return !std::equal(VL.begin(), VL.end(), VH.begin());
+}
+
  bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
                                              unsigned VecRegSize) {
    const unsigned ChainLen = Chain.size();
@@ -5288,20 +5279,20 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
    if (!isPowerOf2_32(Sz) || VF < 2)
      return false;
  
+  // Keep track of values that were deleted by vectorizing in the loop below.
+  const SmallVector<WeakTrackingVH, 8> TrackValues(Chain.begin(), Chain.end());
+
    bool Changed = false;
    // Look for profitable vectorizable trees at all offsets, starting at zero.
    for (unsigned i = 0, e = ChainLen; i + VF <= e; ++i) {
  
-    ArrayRef<Value *> Operands = Chain.slice(i, VF);
      // Check that a previous iteration of this loop did not delete the Value.
-    if (llvm::any_of(Operands, [&R](Value *V) {
-          auto *I = dyn_cast<Instruction>(V);
-          return I && R.isDeleted(I);
-        }))
+    if (hasValueBeenRAUWed(Chain, TrackValues, i, VF))
        continue;
  
      LLVM_DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i
                        << "\n");
+    ArrayRef<Value *> Operands = Chain.slice(i, VF);
  
      R.buildTree(Operands);
      if (R.isTreeTinyAndNotFullyVectorizable())
@@ -5493,6 +5484,9 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
    bool CandidateFound = false;
    int MinCost = SLPCostThreshold;
  
+  // Keep track of values that were deleted by vectorizing in the loop below.
+  SmallVector<WeakTrackingVH, 8> TrackValues(VL.begin(), VL.end());
+
    unsigned NextInst = 0, MaxInst = VL.size();
    for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; VF /= 2) {
      // No actual vectorization should happen, if number of parts is the same as
@@ -5512,16 +5506,13 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
        if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2)
          break;
  
-      ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
        // Check that a previous iteration of this loop did not delete the Value.
-      if (llvm::any_of(Ops, [&R](Value *V) {
-            auto *I = dyn_cast<Instruction>(V);
-            return I && R.isDeleted(I);
-          }))
+      if (hasValueBeenRAUWed(VL, TrackValues, I, OpsWidth))
          continue;
  
        LLVM_DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "
                          << "\n");
+      ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
  
        R.buildTree(Ops);
        Optional<ArrayRef<unsigned>> Order = R.bestOrder();
@@ -5742,23 +5733,23 @@ class HorizontalReduction {
        case RK_Min:
          Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSLT(LHS, RHS)
                                            : Builder.CreateFCmpOLT(LHS, RHS);
-        return Builder.CreateSelect(Cmp, LHS, RHS, Name);
+        break;
        case RK_Max:
          Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSGT(LHS, RHS)
                                            : Builder.CreateFCmpOGT(LHS, RHS);
-        return Builder.CreateSelect(Cmp, LHS, RHS, Name);
+        break;
        case RK_UMin:
          assert(Opcode == Instruction::ICmp && "Expected integer types.");
          Cmp = Builder.CreateICmpULT(LHS, RHS);
-        return Builder.CreateSelect(Cmp, LHS, RHS, Name);
+        break;
        case RK_UMax:
          assert(Opcode == Instruction::ICmp && "Expected integer types.");
          Cmp = Builder.CreateICmpUGT(LHS, RHS);
-        return Builder.CreateSelect(Cmp, LHS, RHS, Name);
-      case RK_None:
          break;
+      case RK_None:
+        llvm_unreachable("Unknown reduction operation.");
        }
-      llvm_unreachable("Unknown reduction operation.");
+      return Builder.CreateSelect(Cmp, LHS, RHS, Name);
      }
  
    public:
@@ -6438,9 +6429,6 @@ public:
        }
        // Update users.
        ReductionRoot->replaceAllUsesWith(VectorizedTree);
-      // Mark all scalar reduction ops for deletion, they are replaced by the
-      // vector reductions.
-      V.eraseInstructions(IgnoreList);
      }
      return VectorizedTree != nullptr;
    }
@@ -6695,13 +6683,18 @@ static bool tryToVectorizeHorReductionOrInstOperands(
    // horizontal reduction.
    // Interrupt the process if the Root instruction itself was vectorized or all
    // sub-trees not higher that RecursionMaxDepth were analyzed/vectorized.
-  SmallVector<std::pair<Instruction *, unsigned>, 8> Stack(1, {Root, 0});
+  SmallVector<std::pair<WeakTrackingVH, unsigned>, 8> Stack(1, {Root, 0});
    SmallPtrSet<Value *, 8> VisitedInstrs;
    bool Res = false;
    while (!Stack.empty()) {
-    Instruction *Inst;
+    Value *V;
      unsigned Level;
-    std::tie(Inst, Level) = Stack.pop_back_val();
+    std::tie(V, Level) = Stack.pop_back_val();
+    if (!V)
+      continue;
+    auto *Inst = dyn_cast<Instruction>(V);
+    if (!Inst)
+      continue;
      auto *BI = dyn_cast<BinaryOperator>(Inst);
      auto *SI = dyn_cast<SelectInst>(Inst);
      if (BI || SI) {
@@ -6742,8 +6735,8 @@ static bool tryToVectorizeHorReductionOrInstOperands(
        for (auto *Op : Inst->operand_values())
          if (VisitedInstrs.insert(Op).second)
            if (auto *I = dyn_cast<Instruction>(Op))
-            if (!isa<PHINode>(I) && !R.isDeleted(I) && I->getParent() == BB)
-              Stack.emplace_back(I, Level);
+            if (!isa<PHINode>(I) && I->getParent() == BB)
+              Stack.emplace_back(Op, Level);
    }
    return Res;
  }
@@ -6812,10 +6805,11 @@ bool SLPVectorizerPass::vectorizeCmpInst(CmpInst *CI, BasicBlock *BB,
  }
  
  bool SLPVectorizerPass::vectorizeSimpleInstructions(
-    SmallVectorImpl<Instruction *> &Instructions, BasicBlock *BB, BoUpSLP &R) {
+    SmallVectorImpl<WeakVH> &Instructions, BasicBlock *BB, BoUpSLP &R) {
    bool OpsChanged = false;
-  for (auto *I : reverse(Instructions)) {
-    if (R.isDeleted(I))
+  for (auto &VH : reverse(Instructions)) {
+    auto *I = dyn_cast_or_null<Instruction>(VH);
+    if (!I)
        continue;
      if (auto *LastInsertValue = dyn_cast<InsertValueInst>(I))
        OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R);
@@ -6844,7 +6838,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
        if (!P)
          break;
  
-      if (!VisitedInstrs.count(P) && !R.isDeleted(P))
+      if (!VisitedInstrs.count(P))
          Incoming.push_back(P);
      }
  
@@ -6888,12 +6882,9 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
  
    VisitedInstrs.clear();
  
-  SmallVector<Instruction *, 8> PostProcessInstructions;
+  SmallVector<WeakVH, 8> PostProcessInstructions;
    SmallDenseSet<Instruction *, 4> KeyNodes;
    for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
-    // Skip instructions marked for the deletion.
-    if (R.isDeleted(&*it))
-      continue;
      // We may go through BB multiple times so skip the one we have checked.
      if (!VisitedInstrs.insert(&*it).second) {
        if (it->use_empty() && KeyNodes.count(&*it) > 0 &&
@@ -6986,10 +6977,10 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {
        SetVector<Value *> Candidates(GEPList.begin(), GEPList.end());
  
        // Some of the candidates may have already been vectorized after we
-      // initially collected them. If so, they are marked as deleted, so remove
-      // them from the set of candidates.
-      Candidates.remove_if(
-          [&R](Value *I) { return R.isDeleted(cast<Instruction>(I)); });
+      // initially collected them. If so, the WeakTrackingVHs will have
+      // nullified the
+      // values, so remove them from the set of candidates.
+      Candidates.remove(nullptr);
  
        // Remove from the set of candidates all pairs of getelementptrs with
        // constant differences. Such getelementptrs are likely not good
@@ -6997,18 +6988,18 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {
        // computed from the other. We also ensure all candidate getelementptr
        // indices are unique.
        for (int I = 0, E = GEPList.size(); I < E && Candidates.size() > 1; ++I) {
-        auto *GEPI = GEPList[I];
+        auto *GEPI = cast<GetElementPtrInst>(GEPList[I]);
          if (!Candidates.count(GEPI))
            continue;
          auto *SCEVI = SE->getSCEV(GEPList[I]);
          for (int J = I + 1; J < E && Candidates.size() > 1; ++J) {
-          auto *GEPJ = GEPList[J];
+          auto *GEPJ = cast<GetElementPtrInst>(GEPList[J]);
            auto *SCEVJ = SE->getSCEV(GEPList[J]);
            if (isa<SCEVConstant>(SE->getMinusSCEV(SCEVI, SCEVJ))) {
-            Candidates.remove(GEPI);
-            Candidates.remove(GEPJ);
+            Candidates.remove(GEPList[I]);
+            Candidates.remove(GEPList[J]);
            } else if (GEPI->idx_begin()->get() == GEPJ->idx_begin()->get()) {
-            Candidates.remove(GEPJ);
+            Candidates.remove(GEPList[J]);
            }
          }
        }
diff --git a/test/Transforms/SLPVectorizer/AArch64/gather-root.ll b/test/Transforms/SLPVectorizer/AArch64/gather-root.ll

index 5f8cbd5c8d0101c6f1fea113c1a2777ff2f28620..5e6db8b483607097d05ee636a41bde85acf638fc 100644 (file)
--- a/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
+++ b/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
@@ -17,8 +17,16 @@ define void @PR28330(i32 %n) {
  ; DEFAULT:       for.body:
  ; DEFAULT-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
  ; DEFAULT-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
+; DEFAULT-NEXT:    [[P20:%.*]] = add i32 [[P17]], undef
+; DEFAULT-NEXT:    [[P22:%.*]] = add i32 [[P20]], undef
+; DEFAULT-NEXT:    [[P24:%.*]] = add i32 [[P22]], undef
+; DEFAULT-NEXT:    [[P26:%.*]] = add i32 [[P24]], undef
+; DEFAULT-NEXT:    [[P28:%.*]] = add i32 [[P26]], undef
+; DEFAULT-NEXT:    [[P30:%.*]] = add i32 [[P28]], undef
+; DEFAULT-NEXT:    [[P32:%.*]] = add i32 [[P30]], undef
  ; DEFAULT-NEXT:    [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
  ; DEFAULT-NEXT:    [[OP_EXTRA]] = add i32 [[TMP3]], [[P17]]
+; DEFAULT-NEXT:    [[P34:%.*]] = add i32 [[P32]], undef
  ; DEFAULT-NEXT:    br label [[FOR_BODY]]
  ;
  ; GATHER-LABEL: @PR28330(
@@ -28,30 +36,37 @@ define void @PR28330(i32 %n) {
  ; GATHER-NEXT:    br label [[FOR_BODY:%.*]]
  ; GATHER:       for.body:
  ; GATHER-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; GATHER-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
-; GATHER-NEXT:    [[TMP3:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
-; GATHER-NEXT:    [[TMP4:%.*]] = insertelement <8 x i1> undef, i1 [[TMP3]], i32 0
-; GATHER-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
-; GATHER-NEXT:    [[TMP6:%.*]] = insertelement <8 x i1> [[TMP4]], i1 [[TMP5]], i32 1
-; GATHER-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
-; GATHER-NEXT:    [[TMP8:%.*]] = insertelement <8 x i1> [[TMP6]], i1 [[TMP7]], i32 2
-; GATHER-NEXT:    [[TMP9:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
-; GATHER-NEXT:    [[TMP10:%.*]] = insertelement <8 x i1> [[TMP8]], i1 [[TMP9]], i32 3
-; GATHER-NEXT:    [[TMP11:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
-; GATHER-NEXT:    [[TMP12:%.*]] = insertelement <8 x i1> [[TMP10]], i1 [[TMP11]], i32 4
-; GATHER-NEXT:    [[TMP13:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
-; GATHER-NEXT:    [[TMP14:%.*]] = insertelement <8 x i1> [[TMP12]], i1 [[TMP13]], i32 5
-; GATHER-NEXT:    [[TMP15:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
-; GATHER-NEXT:    [[TMP16:%.*]] = insertelement <8 x i1> [[TMP14]], i1 [[TMP15]], i32 6
-; GATHER-NEXT:    [[TMP17:%.*]] = insertelement <8 x i1> [[TMP16]], i1 [[TMP2]], i32 7
+; GATHER-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
+; GATHER-NEXT:    [[TMP3:%.*]] = insertelement <8 x i1> undef, i1 [[TMP2]], i32 0
+; GATHER-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
+; GATHER-NEXT:    [[TMP5:%.*]] = insertelement <8 x i1> [[TMP3]], i1 [[TMP4]], i32 1
+; GATHER-NEXT:    [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
+; GATHER-NEXT:    [[TMP7:%.*]] = insertelement <8 x i1> [[TMP5]], i1 [[TMP6]], i32 2
+; GATHER-NEXT:    [[TMP8:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
+; GATHER-NEXT:    [[TMP9:%.*]] = insertelement <8 x i1> [[TMP7]], i1 [[TMP8]], i32 3
+; GATHER-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
+; GATHER-NEXT:    [[TMP11:%.*]] = insertelement <8 x i1> [[TMP9]], i1 [[TMP10]], i32 4
+; GATHER-NEXT:    [[TMP12:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
+; GATHER-NEXT:    [[TMP13:%.*]] = insertelement <8 x i1> [[TMP11]], i1 [[TMP12]], i32 5
+; GATHER-NEXT:    [[TMP14:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
+; GATHER-NEXT:    [[TMP15:%.*]] = insertelement <8 x i1> [[TMP13]], i1 [[TMP14]], i32 6
+; GATHER-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
+; GATHER-NEXT:    [[TMP17:%.*]] = insertelement <8 x i1> [[TMP15]], i1 [[TMP16]], i32 7
  ; GATHER-NEXT:    [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
  ; GATHER-NEXT:    [[TMP19:%.*]] = extractelement <8 x i32> [[TMP18]], i32 0
+; GATHER-NEXT:    [[P20:%.*]] = add i32 [[P17]], [[TMP19]]
  ; GATHER-NEXT:    [[TMP20:%.*]] = extractelement <8 x i32> [[TMP18]], i32 1
+; GATHER-NEXT:    [[P22:%.*]] = add i32 [[P20]], [[TMP20]]
  ; GATHER-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP18]], i32 2
+; GATHER-NEXT:    [[P24:%.*]] = add i32 [[P22]], [[TMP21]]
  ; GATHER-NEXT:    [[TMP22:%.*]] = extractelement <8 x i32> [[TMP18]], i32 3
+; GATHER-NEXT:    [[P26:%.*]] = add i32 [[P24]], [[TMP22]]
  ; GATHER-NEXT:    [[TMP23:%.*]] = extractelement <8 x i32> [[TMP18]], i32 4
+; GATHER-NEXT:    [[P28:%.*]] = add i32 [[P26]], [[TMP23]]
  ; GATHER-NEXT:    [[TMP24:%.*]] = extractelement <8 x i32> [[TMP18]], i32 5
+; GATHER-NEXT:    [[P30:%.*]] = add i32 [[P28]], [[TMP24]]
  ; GATHER-NEXT:    [[TMP25:%.*]] = extractelement <8 x i32> [[TMP18]], i32 6
+; GATHER-NEXT:    [[P32:%.*]] = add i32 [[P30]], [[TMP25]]
  ; GATHER-NEXT:    [[TMP26:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0
  ; GATHER-NEXT:    [[TMP27:%.*]] = insertelement <8 x i32> [[TMP26]], i32 [[TMP20]], i32 1
  ; GATHER-NEXT:    [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32 [[TMP21]], i32 2
@@ -63,6 +78,7 @@ define void @PR28330(i32 %n) {
  ; GATHER-NEXT:    [[TMP34:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP33]], i32 7
  ; GATHER-NEXT:    [[TMP35:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP34]])
  ; GATHER-NEXT:    [[OP_EXTRA]] = add i32 [[TMP35]], [[P17]]
+; GATHER-NEXT:    [[P34:%.*]] = add i32 [[P32]], [[TMP33]]
  ; GATHER-NEXT:    br label [[FOR_BODY]]
  ;
  ; MAX-COST-LABEL: @PR28330(
@@ -153,8 +169,16 @@ define void @PR32038(i32 %n) {
  ; DEFAULT:       for.body:
  ; DEFAULT-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
  ; DEFAULT-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
+; DEFAULT-NEXT:    [[P20:%.*]] = add i32 -5, undef
+; DEFAULT-NEXT:    [[P22:%.*]] = add i32 [[P20]], undef
+; DEFAULT-NEXT:    [[P24:%.*]] = add i32 [[P22]], undef
+; DEFAULT-NEXT:    [[P26:%.*]] = add i32 [[P24]], undef
+; DEFAULT-NEXT:    [[P28:%.*]] = add i32 [[P26]], undef
+; DEFAULT-NEXT:    [[P30:%.*]] = add i32 [[P28]], undef
+; DEFAULT-NEXT:    [[P32:%.*]] = add i32 [[P30]], undef
  ; DEFAULT-NEXT:    [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
  ; DEFAULT-NEXT:    [[OP_EXTRA]] = add i32 [[TMP3]], -5
+; DEFAULT-NEXT:    [[P34:%.*]] = add i32 [[P32]], undef
  ; DEFAULT-NEXT:    br label [[FOR_BODY]]
  ;
  ; GATHER-LABEL: @PR32038(
@@ -164,30 +188,37 @@ define void @PR32038(i32 %n) {
  ; GATHER-NEXT:    br label [[FOR_BODY:%.*]]
  ; GATHER:       for.body:
  ; GATHER-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; GATHER-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
-; GATHER-NEXT:    [[TMP3:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
-; GATHER-NEXT:    [[TMP4:%.*]] = insertelement <8 x i1> undef, i1 [[TMP3]], i32 0
-; GATHER-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
-; GATHER-NEXT:    [[TMP6:%.*]] = insertelement <8 x i1> [[TMP4]], i1 [[TMP5]], i32 1
-; GATHER-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
-; GATHER-NEXT:    [[TMP8:%.*]] = insertelement <8 x i1> [[TMP6]], i1 [[TMP7]], i32 2
-; GATHER-NEXT:    [[TMP9:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
-; GATHER-NEXT:    [[TMP10:%.*]] = insertelement <8 x i1> [[TMP8]], i1 [[TMP9]], i32 3
-; GATHER-NEXT:    [[TMP11:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
-; GATHER-NEXT:    [[TMP12:%.*]] = insertelement <8 x i1> [[TMP10]], i1 [[TMP11]], i32 4
-; GATHER-NEXT:    [[TMP13:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
-; GATHER-NEXT:    [[TMP14:%.*]] = insertelement <8 x i1> [[TMP12]], i1 [[TMP13]], i32 5
-; GATHER-NEXT:    [[TMP15:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
-; GATHER-NEXT:    [[TMP16:%.*]] = insertelement <8 x i1> [[TMP14]], i1 [[TMP15]], i32 6
-; GATHER-NEXT:    [[TMP17:%.*]] = insertelement <8 x i1> [[TMP16]], i1 [[TMP2]], i32 7
+; GATHER-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
+; GATHER-NEXT:    [[TMP3:%.*]] = insertelement <8 x i1> undef, i1 [[TMP2]], i32 0
+; GATHER-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
+; GATHER-NEXT:    [[TMP5:%.*]] = insertelement <8 x i1> [[TMP3]], i1 [[TMP4]], i32 1
+; GATHER-NEXT:    [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
+; GATHER-NEXT:    [[TMP7:%.*]] = insertelement <8 x i1> [[TMP5]], i1 [[TMP6]], i32 2
+; GATHER-NEXT:    [[TMP8:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
+; GATHER-NEXT:    [[TMP9:%.*]] = insertelement <8 x i1> [[TMP7]], i1 [[TMP8]], i32 3
+; GATHER-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
+; GATHER-NEXT:    [[TMP11:%.*]] = insertelement <8 x i1> [[TMP9]], i1 [[TMP10]], i32 4
+; GATHER-NEXT:    [[TMP12:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
+; GATHER-NEXT:    [[TMP13:%.*]] = insertelement <8 x i1> [[TMP11]], i1 [[TMP12]], i32 5
+; GATHER-NEXT:    [[TMP14:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
+; GATHER-NEXT:    [[TMP15:%.*]] = insertelement <8 x i1> [[TMP13]], i1 [[TMP14]], i32 6
+; GATHER-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
+; GATHER-NEXT:    [[TMP17:%.*]] = insertelement <8 x i1> [[TMP15]], i1 [[TMP16]], i32 7
  ; GATHER-NEXT:    [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
  ; GATHER-NEXT:    [[TMP19:%.*]] = extractelement <8 x i32> [[TMP18]], i32 0
+; GATHER-NEXT:    [[P20:%.*]] = add i32 -5, [[TMP19]]
  ; GATHER-NEXT:    [[TMP20:%.*]] = extractelement <8 x i32> [[TMP18]], i32 1
+; GATHER-NEXT:    [[P22:%.*]] = add i32 [[P20]], [[TMP20]]
  ; GATHER-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP18]], i32 2
+; GATHER-NEXT:    [[P24:%.*]] = add i32 [[P22]], [[TMP21]]
  ; GATHER-NEXT:    [[TMP22:%.*]] = extractelement <8 x i32> [[TMP18]], i32 3
+; GATHER-NEXT:    [[P26:%.*]] = add i32 [[P24]], [[TMP22]]
  ; GATHER-NEXT:    [[TMP23:%.*]] = extractelement <8 x i32> [[TMP18]], i32 4
+; GATHER-NEXT:    [[P28:%.*]] = add i32 [[P26]], [[TMP23]]
  ; GATHER-NEXT:    [[TMP24:%.*]] = extractelement <8 x i32> [[TMP18]], i32 5
+; GATHER-NEXT:    [[P30:%.*]] = add i32 [[P28]], [[TMP24]]
  ; GATHER-NEXT:    [[TMP25:%.*]] = extractelement <8 x i32> [[TMP18]], i32 6
+; GATHER-NEXT:    [[P32:%.*]] = add i32 [[P30]], [[TMP25]]
  ; GATHER-NEXT:    [[TMP26:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0
  ; GATHER-NEXT:    [[TMP27:%.*]] = insertelement <8 x i32> [[TMP26]], i32 [[TMP20]], i32 1
  ; GATHER-NEXT:    [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32 [[TMP21]], i32 2
@@ -199,6 +230,7 @@ define void @PR32038(i32 %n) {
  ; GATHER-NEXT:    [[TMP34:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP33]], i32 7
  ; GATHER-NEXT:    [[TMP35:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP34]])
  ; GATHER-NEXT:    [[OP_EXTRA]] = add i32 [[TMP35]], -5
+; GATHER-NEXT:    [[P34:%.*]] = add i32 [[P32]], [[TMP33]]
  ; GATHER-NEXT:    br label [[FOR_BODY]]
  ;
  ; MAX-COST-LABEL: @PR32038(
@@ -227,12 +259,18 @@ define void @PR32038(i32 %n) {
  ; MAX-COST-NEXT:    [[TMP6:%.*]] = insertelement <4 x i1> [[TMP5]], i1 [[P5]], i32 2
  ; MAX-COST-NEXT:    [[TMP7:%.*]] = insertelement <4 x i1> [[TMP6]], i1 [[P7]], i32 3
  ; MAX-COST-NEXT:    [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> <i32 -720, i32 -720, i32 -720, i32 -720>, <4 x i32> <i32 -80, i32 -80, i32 -80, i32 -80>
+; MAX-COST-NEXT:    [[P20:%.*]] = add i32 -5, undef
+; MAX-COST-NEXT:    [[P22:%.*]] = add i32 [[P20]], undef
+; MAX-COST-NEXT:    [[P24:%.*]] = add i32 [[P22]], undef
+; MAX-COST-NEXT:    [[P26:%.*]] = add i32 [[P24]], undef
  ; MAX-COST-NEXT:    [[P27:%.*]] = select i1 [[P9]], i32 -720, i32 -80
+; MAX-COST-NEXT:    [[P28:%.*]] = add i32 [[P26]], [[P27]]
  ; MAX-COST-NEXT:    [[P29:%.*]] = select i1 [[P11]], i32 -720, i32 -80
  ; MAX-COST-NEXT:    [[TMP9:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP8]])
  ; MAX-COST-NEXT:    [[TMP10:%.*]] = add i32 [[TMP9]], [[P27]]
  ; MAX-COST-NEXT:    [[TMP11:%.*]] = add i32 [[TMP10]], [[P29]]
  ; MAX-COST-NEXT:    [[OP_EXTRA:%.*]] = add i32 [[TMP11]], -5
+; MAX-COST-NEXT:    [[P30:%.*]] = add i32 [[P28]], [[P29]]
  ; MAX-COST-NEXT:    [[P31:%.*]] = select i1 [[P13]], i32 -720, i32 -80
  ; MAX-COST-NEXT:    [[P32:%.*]] = add i32 [[OP_EXTRA]], [[P31]]
  ; MAX-COST-NEXT:    [[P33:%.*]] = select i1 [[P15]], i32 -720, i32 -80
diff --git a/test/Transforms/SLPVectorizer/AArch64/horizontal.ll b/test/Transforms/SLPVectorizer/AArch64/horizontal.ll

index b3ce90dabd843dbecb7cf57c4f792a0f2418dd27..7e4b95d7234994bc0e5f4eba068f31b7618af5b8 100644 (file)
--- a/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
+++ b/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
@@ -46,8 +46,12 @@ define i32 @test_select(i32* noalias nocapture readonly %blk1, i32* noalias noca
  ; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]], zeroinitializer
  ; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP4]]
  ; CHECK-NEXT:    [[TMP7:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP4]]
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 undef, [[S_026]]
+; CHECK-NEXT:    [[ADD11:%.*]] = add nsw i32 [[ADD]], undef
+; CHECK-NEXT:    [[ADD19:%.*]] = add nsw i32 [[ADD11]], undef
  ; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
  ; CHECK-NEXT:    [[OP_EXTRA]] = add nsw i32 [[TMP8]], [[S_026]]
+; CHECK-NEXT:    [[ADD27:%.*]] = add nsw i32 [[ADD19]], undef
  ; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i32, i32* [[P1_023]], i64 [[IDX_EXT]]
  ; CHECK-NEXT:    [[ADD_PTR29]] = getelementptr inbounds i32, i32* [[P2_024]], i64 [[IDX_EXT]]
  ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[J_025]], 1
@@ -169,8 +173,12 @@ define i32 @reduction_with_br(i32* noalias nocapture readonly %blk1, i32* noalia
  ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[P2_018]] to <4 x i32>*
  ; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
  ; CHECK-NEXT:    [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP3]], [[TMP1]]
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 undef, [[S_020]]
+; CHECK-NEXT:    [[ADD5:%.*]] = add nsw i32 [[ADD]], undef
+; CHECK-NEXT:    [[ADD9:%.*]] = add nsw i32 [[ADD5]], undef
  ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
  ; CHECK-NEXT:    [[OP_EXTRA]] = add nsw i32 [[TMP5]], [[S_020]]
+; CHECK-NEXT:    [[ADD13:%.*]] = add nsw i32 [[ADD9]], undef
  ; CHECK-NEXT:    [[CMP14:%.*]] = icmp slt i32 [[OP_EXTRA]], [[LIM:%.*]]
  ; CHECK-NEXT:    br i1 [[CMP14]], label [[IF_END]], label [[FOR_END_LOOPEXIT:%.*]]
  ; CHECK:       if.end:
@@ -285,8 +293,16 @@ define i32 @test_unrolled_select(i8* noalias nocapture readonly %blk1, i8* noali
  ; CHECK-NEXT:    [[TMP7:%.*]] = icmp slt <8 x i32> [[TMP6]], zeroinitializer
  ; CHECK-NEXT:    [[TMP8:%.*]] = sub nsw <8 x i32> zeroinitializer, [[TMP6]]
  ; CHECK-NEXT:    [[TMP9:%.*]] = select <8 x i1> [[TMP7]], <8 x i32> [[TMP8]], <8 x i32> [[TMP6]]
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 undef, [[S_047]]
+; CHECK-NEXT:    [[ADD16:%.*]] = add nsw i32 [[ADD]], undef
+; CHECK-NEXT:    [[ADD27:%.*]] = add nsw i32 [[ADD16]], undef
+; CHECK-NEXT:    [[ADD38:%.*]] = add nsw i32 [[ADD27]], undef
+; CHECK-NEXT:    [[ADD49:%.*]] = add nsw i32 [[ADD38]], undef
+; CHECK-NEXT:    [[ADD60:%.*]] = add nsw i32 [[ADD49]], undef
+; CHECK-NEXT:    [[ADD71:%.*]] = add nsw i32 [[ADD60]], undef
  ; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP9]])
  ; CHECK-NEXT:    [[OP_EXTRA]] = add nsw i32 [[TMP10]], [[S_047]]
+; CHECK-NEXT:    [[ADD82:%.*]] = add nsw i32 [[ADD71]], undef
  ; CHECK-NEXT:    [[CMP83:%.*]] = icmp slt i32 [[OP_EXTRA]], [[LIM:%.*]]
  ; CHECK-NEXT:    br i1 [[CMP83]], label [[IF_END_86]], label [[FOR_END_LOOPEXIT:%.*]]
  ; CHECK:       if.end.86:
diff --git a/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll b/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll

index 90771e4eb3176e44f19c4d45776118d17b515ab4..98a9fd482e872c77173458aa883f57fd27d717c2 100644 (file)
--- a/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll
+++ b/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll
@@ -13,11 +13,11 @@ define void @patatino(i64 %n, i64 %i, %struct.S* %p) !dbg !7 {
  ; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 [[I:%.*]], metadata !19, metadata !DIExpression()), !dbg !24
  ; CHECK-NEXT:    call void @llvm.dbg.value(metadata %struct.S* [[P:%.*]], metadata !20, metadata !DIExpression()), !dbg !25
  ; CHECK-NEXT:    [[X1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[P]], i64 [[N]], i32 0, !dbg !26
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata !2, metadata !21, metadata !DIExpression()), !dbg !27
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 undef, metadata !21, metadata !DIExpression()), !dbg !27
  ; CHECK-NEXT:    [[Y3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[N]], i32 1, !dbg !28
  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[X1]] to <2 x i64>*, !dbg !26
  ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8, !dbg !26, !tbaa !29
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata !2, metadata !22, metadata !DIExpression()), !dbg !33
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 undef, metadata !22, metadata !DIExpression()), !dbg !33
  ; CHECK-NEXT:    [[X5:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[I]], i32 0, !dbg !34
  ; CHECK-NEXT:    [[Y7:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[I]], i32 1, !dbg !35
  ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[X5]] to <2 x i64>*, !dbg !36
diff --git a/test/Transforms/SLPVectorizer/X86/PR31847.ll b/test/Transforms/SLPVectorizer/X86/PR31847.ll

deleted file mode 100644 (file)

index 2fe3424..0000000
--- a/test/Transforms/SLPVectorizer/X86/PR31847.ll
+++ /dev/null
@@ -1,153 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -slp-vectorizer -S -o - -mtriple=i386 -mcpu=haswell < %s | FileCheck %s
-target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
-
-@shift = common local_unnamed_addr global [10 x i32] zeroinitializer, align 4
-@data = common local_unnamed_addr global [10 x i8*] zeroinitializer, align 4
-
-define void @flat(i32 %intensity) {
-; CHECK-LABEL: @flat(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 0), align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 1), align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 0), align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 1), align 4
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 1, [[TMP0]]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 [[SHR]]
-; CHECK-NEXT:    [[SHR1:%.*]] = lshr i32 1, [[TMP1]]
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP3]], i32 [[SHR1]]
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    ret void
-; CHECK:       for.body:
-; CHECK-NEXT:    [[D1_DATA_046:%.*]] = phi i8* [ [[TMP3]], [[ENTRY:%.*]] ], [ [[ADD_PTR23_1:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[Y_045:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_1:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
-; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP4]] to i32
-; CHECK-NEXT:    [[SUB:%.*]] = add nsw i32 [[CONV]], -128
-; CHECK-NEXT:    [[TMP5:%.*]] = load i8, i8* [[ARRAYIDX2]], align 1
-; CHECK-NEXT:    [[CONV3:%.*]] = zext i8 [[TMP5]] to i32
-; CHECK-NEXT:    [[SUB4:%.*]] = add nsw i32 [[CONV3]], -128
-; CHECK-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[SUB]], -1
-; CHECK-NEXT:    [[SUB7:%.*]] = sub nsw i32 128, [[CONV]]
-; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP5]], i32 [[SUB]], i32 [[SUB7]]
-; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[SUB4]], -1
-; CHECK-NEXT:    [[SUB12:%.*]] = sub nsw i32 128, [[CONV3]]
-; CHECK-NEXT:    [[COND14:%.*]] = select i1 [[CMP8]], i32 [[SUB4]], i32 [[SUB12]]
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[COND14]], [[COND]]
-; CHECK-NEXT:    [[IDX_NEG:%.*]] = sub nsw i32 0, [[ADD]]
-; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[IDX_NEG]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i8, i8* [[ADD_PTR]], align 1
-; CHECK-NEXT:    [[CONV15:%.*]] = zext i8 [[TMP6]] to i32
-; CHECK-NEXT:    [[ADD16:%.*]] = add nsw i32 [[CONV15]], [[INTENSITY:%.*]]
-; CHECK-NEXT:    [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8
-; CHECK-NEXT:    store i8 [[CONV17]], i8* [[ADD_PTR]], align 1
-; CHECK-NEXT:    [[ADD_PTR18:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[ADD]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load i8, i8* [[ADD_PTR18]], align 1
-; CHECK-NEXT:    [[NOT_TOBOOL:%.*]] = icmp eq i8 [[TMP7]], 0
-; CHECK-NEXT:    [[CONV21:%.*]] = zext i1 [[NOT_TOBOOL]] to i8
-; CHECK-NEXT:    store i8 [[CONV21]], i8* [[ADD_PTR18]], align 1
-; CHECK-NEXT:    [[ADD_PTR23:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[TMP1]]
-; CHECK-NEXT:    [[TMP8:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
-; CHECK-NEXT:    [[CONV_1:%.*]] = zext i8 [[TMP8]] to i32
-; CHECK-NEXT:    [[SUB_1:%.*]] = add nsw i32 [[CONV_1]], -128
-; CHECK-NEXT:    [[TMP9:%.*]] = load i8, i8* [[ARRAYIDX2]], align 1
-; CHECK-NEXT:    [[CONV3_1:%.*]] = zext i8 [[TMP9]] to i32
-; CHECK-NEXT:    [[SUB4_1:%.*]] = add nsw i32 [[CONV3_1]], -128
-; CHECK-NEXT:    [[CMP5_1:%.*]] = icmp sgt i32 [[SUB_1]], -1
-; CHECK-NEXT:    [[SUB7_1:%.*]] = sub nsw i32 128, [[CONV_1]]
-; CHECK-NEXT:    [[COND_1:%.*]] = select i1 [[CMP5_1]], i32 [[SUB_1]], i32 [[SUB7_1]]
-; CHECK-NEXT:    [[CMP8_1:%.*]] = icmp sgt i32 [[SUB4_1]], -1
-; CHECK-NEXT:    [[SUB12_1:%.*]] = sub nsw i32 128, [[CONV3_1]]
-; CHECK-NEXT:    [[COND14_1:%.*]] = select i1 [[CMP8_1]], i32 [[SUB4_1]], i32 [[SUB12_1]]
-; CHECK-NEXT:    [[ADD_1:%.*]] = add nsw i32 [[COND14_1]], [[COND_1]]
-; CHECK-NEXT:    [[IDX_NEG_1:%.*]] = sub nsw i32 0, [[ADD_1]]
-; CHECK-NEXT:    [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[IDX_NEG_1]]
-; CHECK-NEXT:    [[TMP10:%.*]] = load i8, i8* [[ADD_PTR_1]], align 1
-; CHECK-NEXT:    [[CONV15_1:%.*]] = zext i8 [[TMP10]] to i32
-; CHECK-NEXT:    [[ADD16_1:%.*]] = add nsw i32 [[CONV15_1]], [[INTENSITY]]
-; CHECK-NEXT:    [[CONV17_1:%.*]] = trunc i32 [[ADD16_1]] to i8
-; CHECK-NEXT:    store i8 [[CONV17_1]], i8* [[ADD_PTR_1]], align 1
-; CHECK-NEXT:    [[ADD_PTR18_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[ADD_1]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i8, i8* [[ADD_PTR18_1]], align 1
-; CHECK-NEXT:    [[NOT_TOBOOL_1:%.*]] = icmp eq i8 [[TMP11]], 0
-; CHECK-NEXT:    [[CONV21_1:%.*]] = zext i1 [[NOT_TOBOOL_1]] to i8
-; CHECK-NEXT:    store i8 [[CONV21_1]], i8* [[ADD_PTR18_1]], align 1
-; CHECK-NEXT:    [[ADD_PTR23_1]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[TMP1]]
-; CHECK-NEXT:    [[INC_1]] = add nsw i32 [[Y_045]], 2
-; CHECK-NEXT:    [[EXITCOND_1:%.*]] = icmp eq i32 [[INC_1]], 128
-; CHECK-NEXT:    br i1 [[EXITCOND_1]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
-;
-entry:
-  %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 0), align 4
-  %1 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 1), align 4
-  %2 = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 0), align 4
-  %3 = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 1), align 4
-  %shr = lshr i32 1, %0
-  %arrayidx = getelementptr inbounds i8, i8* %2, i32 %shr
-  %shr1 = lshr i32 1, %1
-  %arrayidx2 = getelementptr inbounds i8, i8* %3, i32 %shr1
-  br label %for.body
-
-for.cond.cleanup:                                 ; preds = %for.body
-  ret void
-
-for.body:                                         ; preds = %for.body, %entry
-  %d1_data.046 = phi i8* [ %3, %entry ], [ %add.ptr23.1, %for.body ]
-  %y.045 = phi i32 [ 0, %entry ], [ %inc.1, %for.body ]
-  %4 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %4 to i32
-  %sub = add nsw i32 %conv, -128
-  %5 = load i8, i8* %arrayidx2, align 1
-  %conv3 = zext i8 %5 to i32
-  %sub4 = add nsw i32 %conv3, -128
-  %cmp5 = icmp sgt i32 %sub, -1
-  %sub7 = sub nsw i32 128, %conv
-  %cond = select i1 %cmp5, i32 %sub, i32 %sub7
-  %cmp8 = icmp sgt i32 %sub4, -1
-  %sub12 = sub nsw i32 128, %conv3
-  %cond14 = select i1 %cmp8, i32 %sub4, i32 %sub12
-  %add = add nsw i32 %cond14, %cond
-  %idx.neg = sub nsw i32 0, %add
-  %add.ptr = getelementptr inbounds i8, i8* %d1_data.046, i32 %idx.neg
-  %6 = load i8, i8* %add.ptr, align 1
-  %conv15 = zext i8 %6 to i32
-  %add16 = add nsw i32 %conv15, %intensity
-  %conv17 = trunc i32 %add16 to i8
-  store i8 %conv17, i8* %add.ptr, align 1
-  %add.ptr18 = getelementptr inbounds i8, i8* %d1_data.046, i32 %add
-  %7 = load i8, i8* %add.ptr18, align 1
-  %not.tobool = icmp eq i8 %7, 0
-  %conv21 = zext i1 %not.tobool to i8
-  store i8 %conv21, i8* %add.ptr18, align 1
-  %add.ptr23 = getelementptr inbounds i8, i8* %d1_data.046, i32 %1
-  %8 = load i8, i8* %arrayidx, align 1
-  %conv.1 = zext i8 %8 to i32
-  %sub.1 = add nsw i32 %conv.1, -128
-  %9 = load i8, i8* %arrayidx2, align 1
-  %conv3.1 = zext i8 %9 to i32
-  %sub4.1 = add nsw i32 %conv3.1, -128
-  %cmp5.1 = icmp sgt i32 %sub.1, -1
-  %sub7.1 = sub nsw i32 128, %conv.1
-  %cond.1 = select i1 %cmp5.1, i32 %sub.1, i32 %sub7.1
-  %cmp8.1 = icmp sgt i32 %sub4.1, -1
-  %sub12.1 = sub nsw i32 128, %conv3.1
-  %cond14.1 = select i1 %cmp8.1, i32 %sub4.1, i32 %sub12.1
-  %add.1 = add nsw i32 %cond14.1, %cond.1
-  %idx.neg.1 = sub nsw i32 0, %add.1
-  %add.ptr.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %idx.neg.1
-  %10 = load i8, i8* %add.ptr.1, align 1
-  %conv15.1 = zext i8 %10 to i32
-  %add16.1 = add nsw i32 %conv15.1, %intensity
-  %conv17.1 = trunc i32 %add16.1 to i8
-  store i8 %conv17.1, i8* %add.ptr.1, align 1
-  %add.ptr18.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %add.1
-  %11 = load i8, i8* %add.ptr18.1, align 1
-  %not.tobool.1 = icmp eq i8 %11, 0
-  %conv21.1 = zext i1 %not.tobool.1 to i8
-  store i8 %conv21.1, i8* %add.ptr18.1, align 1
-  %add.ptr23.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %1
-  %inc.1 = add nsw i32 %y.045, 2
-  %exitcond.1 = icmp eq i32 %inc.1, 128
-  br i1 %exitcond.1, label %for.cond.cleanup, label %for.body
-}
diff --git a/test/Transforms/SLPVectorizer/X86/PR35628_1.ll b/test/Transforms/SLPVectorizer/X86/PR35628_1.ll

index e033ed768ca056063caad60712b2e43c5979524f..625748f6820932f03ffeb4c867c4feed6d3183df 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/PR35628_1.ll
+++ b/test/Transforms/SLPVectorizer/X86/PR35628_1.ll
@@ -18,16 +18,23 @@ define void @mainTest(i32* %ptr) #0  {
  ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
  ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1
  ; CHECK-NEXT:    [[TMP8:%.*]] = mul <4 x i32> [[TMP4]], [[TMP4]]
-; CHECK-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP6]] to i64
+; CHECK-NEXT:    [[TMP9:%.*]] = add i32 1, undef
+; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP7]]
+; CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[TMP10]], undef
+; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[TMP11]], [[TMP6]]
+; CHECK-NEXT:    [[TMP13:%.*]] = add i32 [[TMP12]], undef
+; CHECK-NEXT:    [[TMP14:%.*]] = sext i32 [[TMP6]] to i64
+; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[TMP13]], [[TMP5]]
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP8]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
-; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
-; CHECK-NEXT:    [[OP_EXTRA:%.*]] = add i32 [[TMP10]], 1
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
+; CHECK-NEXT:    [[OP_EXTRA:%.*]] = add i32 [[TMP16]], 1
  ; CHECK-NEXT:    [[OP_EXTRA3:%.*]] = add i32 [[OP_EXTRA]], [[TMP7]]
  ; CHECK-NEXT:    [[OP_EXTRA4:%.*]] = add i32 [[OP_EXTRA3]], [[TMP6]]
  ; CHECK-NEXT:    [[OP_EXTRA5]] = add i32 [[OP_EXTRA4]], [[TMP5]]
+; CHECK-NEXT:    [[TMP17:%.*]] = add i32 [[TMP15]], undef
  ; CHECK-NEXT:    br label [[LOOP]]
  ; CHECK:       bail_out:
  ; CHECK-NEXT:    ret void
diff --git a/test/Transforms/SLPVectorizer/X86/PR35628_2.ll b/test/Transforms/SLPVectorizer/X86/PR35628_2.ll

index 4916d283bf381d66e076a4af54118b62c134a330..712ff040a91860486ed9b0122b7654621d3e7832 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/PR35628_2.ll
+++ b/test/Transforms/SLPVectorizer/X86/PR35628_2.ll
@@ -20,6 +20,10 @@ define void @test() #0 {
  ; CHECK-NEXT:    [[DUMMY_SHL:%.*]] = shl i64 [[TMP7]], 32
  ; CHECK-NEXT:    [[TMP8:%.*]] = add <4 x i64> <i64 1, i64 1, i64 1, i64 1>, [[TMP5]]
  ; CHECK-NEXT:    [[TMP9:%.*]] = ashr exact <4 x i64> [[TMP8]], <i64 32, i64 32, i64 32, i64 32>
+; CHECK-NEXT:    [[SUM1:%.*]] = add i64 undef, undef
+; CHECK-NEXT:    [[SUM2:%.*]] = add i64 [[SUM1]], undef
+; CHECK-NEXT:    [[ZSUM:%.*]] = add i64 [[SUM2]], 0
+; CHECK-NEXT:    [[JOIN:%.*]] = add i64 [[TMP6]], [[ZSUM]]
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i64> [[TMP9]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]], <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
@@ -27,6 +31,7 @@ define void @test() #0 {
  ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i64> [[BIN_RDX2]], i32 0
  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = add i64 [[TMP10]], 0
  ; CHECK-NEXT:    [[OP_EXTRA3]] = add i64 [[OP_EXTRA]], [[TMP6]]
+; CHECK-NEXT:    [[LAST:%.*]] = add i64 [[JOIN]], undef
  ; CHECK-NEXT:    br label [[LOOP]]
  ;
  entry:
diff --git a/test/Transforms/SLPVectorizer/X86/PR39774.ll b/test/Transforms/SLPVectorizer/X86/PR39774.ll

index db02d64ba9382a1d5f035e6f016a60d2936028c6..24f75b32c5d2d0aa6975c957550b2c6bf26bd00e 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/PR39774.ll
+++ b/test/Transforms/SLPVectorizer/X86/PR39774.ll
@@ -11,6 +11,40 @@ define void @Test(i32) {
  ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
  ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1
  ; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[SHUFFLE]], <i32 0, i32 55, i32 285, i32 1240, i32 1496, i32 8555, i32 12529, i32 13685>
+; CHECK-NEXT:    [[VAL_1:%.*]] = and i32 [[TMP2]], undef
+; CHECK-NEXT:    [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]]
+; CHECK-NEXT:    [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_7:%.*]] = and i32 [[VAL_5]], undef
+; CHECK-NEXT:    [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_12:%.*]] = and i32 [[VAL_10]], undef
+; CHECK-NEXT:    [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_19:%.*]] = and i32 [[VAL_17]], undef
+; CHECK-NEXT:    [[VAL_21:%.*]] = and i32 [[VAL_19]], undef
+; CHECK-NEXT:    [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_35:%.*]] = and i32 [[VAL_33]], undef
+; CHECK-NEXT:    [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_40:%.*]] = and i32 [[VAL_38]], undef
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = and <8 x i32> [[TMP3]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -18,7 +52,7 @@ define void @Test(i32) {
  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
  ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
-; CHECK-NEXT:    [[OP_EXTRA:%.*]] = and i32 [[TMP4]], [[TMP0:%.*]]
+; CHECK-NEXT:    [[OP_EXTRA:%.*]] = and i32 [[TMP4]], [[TMP0]]
  ; CHECK-NEXT:    [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]]
  ; CHECK-NEXT:    [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]]
  ; CHECK-NEXT:    [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]]
@@ -45,6 +79,7 @@ define void @Test(i32) {
  ; CHECK-NEXT:    [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]]
  ; CHECK-NEXT:    [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP0]]
  ; CHECK-NEXT:    [[OP_EXTRA30:%.*]] = and i32 [[OP_EXTRA29]], [[TMP0]]
+; CHECK-NEXT:    [[VAL_42:%.*]] = and i32 [[VAL_40]], undef
  ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> undef, i32 [[OP_EXTRA30]], i32 0
  ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 14910, i32 1
  ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
@@ -66,8 +101,40 @@ define void @Test(i32) {
  ; FORCE_REDUCTION-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
  ; FORCE_REDUCTION-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1
  ; FORCE_REDUCTION-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[SHUFFLE]], <i32 0, i32 55, i32 285, i32 1240>
+; FORCE_REDUCTION-NEXT:    [[VAL_1:%.*]] = and i32 [[TMP2]], undef
+; FORCE_REDUCTION-NEXT:    [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]]
+; FORCE_REDUCTION-NEXT:    [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_7:%.*]] = and i32 [[VAL_5]], undef
+; FORCE_REDUCTION-NEXT:    [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_12:%.*]] = and i32 [[VAL_10]], undef
+; FORCE_REDUCTION-NEXT:    [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_19:%.*]] = and i32 [[VAL_17]], undef
  ; FORCE_REDUCTION-NEXT:    [[VAL_20:%.*]] = add i32 [[TMP2]], 1496
+; FORCE_REDUCTION-NEXT:    [[VAL_21:%.*]] = and i32 [[VAL_19]], [[VAL_20]]
+; FORCE_REDUCTION-NEXT:    [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
  ; FORCE_REDUCTION-NEXT:    [[VAL_34:%.*]] = add i32 [[TMP2]], 8555
+; FORCE_REDUCTION-NEXT:    [[VAL_35:%.*]] = and i32 [[VAL_33]], [[VAL_34]]
+; FORCE_REDUCTION-NEXT:    [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
+; FORCE_REDUCTION-NEXT:    [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
  ; FORCE_REDUCTION-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; FORCE_REDUCTION-NEXT:    [[BIN_RDX:%.*]] = and <4 x i32> [[TMP3]], [[RDX_SHUF]]
  ; FORCE_REDUCTION-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
@@ -75,7 +142,7 @@ define void @Test(i32) {
  ; FORCE_REDUCTION-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
  ; FORCE_REDUCTION-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], [[VAL_20]]
  ; FORCE_REDUCTION-NEXT:    [[TMP6:%.*]] = and i32 [[TMP5]], [[VAL_34]]
-; FORCE_REDUCTION-NEXT:    [[OP_EXTRA:%.*]] = and i32 [[TMP6]], [[TMP0:%.*]]
+; FORCE_REDUCTION-NEXT:    [[OP_EXTRA:%.*]] = and i32 [[TMP6]], [[TMP0]]
  ; FORCE_REDUCTION-NEXT:    [[OP_EXTRA3:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]]
  ; FORCE_REDUCTION-NEXT:    [[OP_EXTRA4:%.*]] = and i32 [[OP_EXTRA3]], [[TMP0]]
  ; FORCE_REDUCTION-NEXT:    [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA4]], [[TMP0]]
@@ -103,6 +170,7 @@ define void @Test(i32) {
  ; FORCE_REDUCTION-NEXT:    [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]], [[TMP0]]
  ; FORCE_REDUCTION-NEXT:    [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]]
  ; FORCE_REDUCTION-NEXT:    [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP2]]
+; FORCE_REDUCTION-NEXT:    [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
  ; FORCE_REDUCTION-NEXT:    [[VAL_39:%.*]] = add i32 [[TMP2]], 12529
  ; FORCE_REDUCTION-NEXT:    [[VAL_40:%.*]] = and i32 [[OP_EXTRA29]], [[VAL_39]]
  ; FORCE_REDUCTION-NEXT:    [[VAL_41:%.*]] = add i32 [[TMP2]], 13685
diff --git a/test/Transforms/SLPVectorizer/X86/PR40310.ll b/test/Transforms/SLPVectorizer/X86/PR40310.ll

index 4622c77e8c87403849bc96910c2653ef5b34ab6b..2a0b66ee28174666624127b0eeaa0c5097e979d1 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/PR40310.ll
+++ b/test/Transforms/SLPVectorizer/X86/PR40310.ll
@@ -13,6 +13,21 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) {
  ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 15
  ; CHECK-NEXT:    store atomic i32 [[TMP3]], i32* [[VALS:%.*]] unordered, align 4
  ; CHECK-NEXT:    [[TMP4:%.*]] = add <16 x i32> [[SHUFFLE]], <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 -1>
+; CHECK-NEXT:    [[V14:%.*]] = and i32 [[TMP2]], undef
+; CHECK-NEXT:    [[V16:%.*]] = and i32 undef, [[V14]]
+; CHECK-NEXT:    [[V18:%.*]] = and i32 undef, [[V16]]
+; CHECK-NEXT:    [[V20:%.*]] = and i32 undef, [[V18]]
+; CHECK-NEXT:    [[V22:%.*]] = and i32 undef, [[V20]]
+; CHECK-NEXT:    [[V24:%.*]] = and i32 undef, [[V22]]
+; CHECK-NEXT:    [[V26:%.*]] = and i32 undef, [[V24]]
+; CHECK-NEXT:    [[V28:%.*]] = and i32 undef, [[V26]]
+; CHECK-NEXT:    [[V30:%.*]] = and i32 undef, [[V28]]
+; CHECK-NEXT:    [[V32:%.*]] = and i32 undef, [[V30]]
+; CHECK-NEXT:    [[V34:%.*]] = and i32 undef, [[V32]]
+; CHECK-NEXT:    [[V36:%.*]] = and i32 undef, [[V34]]
+; CHECK-NEXT:    [[V38:%.*]] = and i32 undef, [[V36]]
+; CHECK-NEXT:    [[V40:%.*]] = and i32 undef, [[V38]]
+; CHECK-NEXT:    [[V42:%.*]] = and i32 undef, [[V40]]
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = and <16 x i32> [[TMP4]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[BIN_RDX]], <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -23,6 +38,7 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) {
  ; CHECK-NEXT:    [[BIN_RDX6:%.*]] = and <16 x i32> [[BIN_RDX4]], [[RDX_SHUF5]]
  ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <16 x i32> [[BIN_RDX6]], i32 0
  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = and i32 [[TMP5]], [[TMP2]]
+; CHECK-NEXT:    [[V43:%.*]] = and i32 undef, [[V42]]
  ; CHECK-NEXT:    [[V44:%.*]] = add i32 [[TMP2]], 16
  ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> undef, i32 [[V44]], i32 0
  ; CHECK-NEXT:    [[TMP7]] = insertelement <2 x i32> [[TMP6]], i32 [[OP_EXTRA]], i32 1
diff --git a/test/Transforms/SLPVectorizer/X86/bad-reduction.ll b/test/Transforms/SLPVectorizer/X86/bad-reduction.ll

index e3452e194dbfbf460e5196a4499321092fecd271..8637df4acc128edef07730512553d4403cbd77a9 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/bad-reduction.ll
+++ b/test/Transforms/SLPVectorizer/X86/bad-reduction.ll
@@ -30,6 +30,12 @@ define i64 @load_bswap(%v8i8* %p) {
  ; CHECK-NEXT:    [[SH4:%.*]] = shl nuw nsw i64 [[Z4]], 24
  ; CHECK-NEXT:    [[SH5:%.*]] = shl nuw nsw i64 [[Z5]], 16
  ; CHECK-NEXT:    [[SH6:%.*]] = shl nuw nsw i64 [[Z6]], 8
+; CHECK-NEXT:    [[OR01:%.*]] = or i64 undef, undef
+; CHECK-NEXT:    [[OR012:%.*]] = or i64 [[OR01]], undef
+; CHECK-NEXT:    [[OR0123:%.*]] = or i64 [[OR012]], undef
+; CHECK-NEXT:    [[OR01234:%.*]] = or i64 [[OR0123]], [[SH4]]
+; CHECK-NEXT:    [[OR012345:%.*]] = or i64 [[OR01234]], [[SH5]]
+; CHECK-NEXT:    [[OR0123456:%.*]] = or i64 [[OR012345]], [[SH6]]
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = or <4 x i64> [[TMP4]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]], <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
@@ -39,6 +45,7 @@ define i64 @load_bswap(%v8i8* %p) {
  ; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[TMP6]], [[SH5]]
  ; CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP7]], [[SH6]]
  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = or i64 [[TMP8]], [[Z7]]
+; CHECK-NEXT:    [[OR01234567:%.*]] = or i64 [[OR0123456]], [[Z7]]
  ; CHECK-NEXT:    ret i64 [[OP_EXTRA]]
  ;
    %g0 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 0
@@ -101,6 +108,12 @@ define i64 @load_bswap_nop_shift(%v8i8* %p) {
  ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
  ; CHECK-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i64>
  ; CHECK-NEXT:    [[TMP4:%.*]] = shl nuw <8 x i64> [[TMP3]], <i64 56, i64 48, i64 40, i64 32, i64 24, i64 16, i64 8, i64 0>
+; CHECK-NEXT:    [[OR01:%.*]] = or i64 undef, undef
+; CHECK-NEXT:    [[OR012:%.*]] = or i64 [[OR01]], undef
+; CHECK-NEXT:    [[OR0123:%.*]] = or i64 [[OR012]], undef
+; CHECK-NEXT:    [[OR01234:%.*]] = or i64 [[OR0123]], undef
+; CHECK-NEXT:    [[OR012345:%.*]] = or i64 [[OR01234]], undef
+; CHECK-NEXT:    [[OR0123456:%.*]] = or i64 [[OR012345]], undef
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = or <8 x i64> [[TMP4]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i64> [[BIN_RDX]], <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -108,6 +121,7 @@ define i64 @load_bswap_nop_shift(%v8i8* %p) {
  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i64> [[BIN_RDX2]], <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = or <8 x i64> [[BIN_RDX2]], [[RDX_SHUF3]]
  ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i64> [[BIN_RDX4]], i32 0
+; CHECK-NEXT:    [[OR01234567:%.*]] = or i64 [[OR0123456]], undef
  ; CHECK-NEXT:    ret i64 [[TMP5]]
  ;
    %g0 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 0
@@ -182,6 +196,12 @@ define i64 @load64le(i8* %arg) {
  ; CHECK-NEXT:    [[S5:%.*]] = shl nuw nsw i64 [[Z5]], 40
  ; CHECK-NEXT:    [[S6:%.*]] = shl nuw nsw i64 [[Z6]], 48
  ; CHECK-NEXT:    [[S7:%.*]] = shl nuw i64 [[Z7]], 56
+; CHECK-NEXT:    [[O1:%.*]] = or i64 undef, [[Z0]]
+; CHECK-NEXT:    [[O2:%.*]] = or i64 [[O1]], undef
+; CHECK-NEXT:    [[O3:%.*]] = or i64 [[O2]], undef
+; CHECK-NEXT:    [[O4:%.*]] = or i64 [[O3]], undef
+; CHECK-NEXT:    [[O5:%.*]] = or i64 [[O4]], [[S5]]
+; CHECK-NEXT:    [[O6:%.*]] = or i64 [[O5]], [[S6]]
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = or <4 x i64> [[TMP4]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]], <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
@@ -191,6 +211,7 @@ define i64 @load64le(i8* %arg) {
  ; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[TMP6]], [[S6]]
  ; CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP7]], [[S7]]
  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = or i64 [[TMP8]], [[Z0]]
+; CHECK-NEXT:    [[O7:%.*]] = or i64 [[O6]], [[S7]]
  ; CHECK-NEXT:    ret i64 [[OP_EXTRA]]
  ;
    %g1 = getelementptr inbounds i8, i8* %arg, i64 1
@@ -251,6 +272,12 @@ define i64 @load64le_nop_shift(i8* %arg) {
  ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
  ; CHECK-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i64>
  ; CHECK-NEXT:    [[TMP4:%.*]] = shl nuw <8 x i64> [[TMP3]], <i64 0, i64 8, i64 16, i64 24, i64 32, i64 40, i64 48, i64 56>
+; CHECK-NEXT:    [[O1:%.*]] = or i64 undef, undef
+; CHECK-NEXT:    [[O2:%.*]] = or i64 [[O1]], undef
+; CHECK-NEXT:    [[O3:%.*]] = or i64 [[O2]], undef
+; CHECK-NEXT:    [[O4:%.*]] = or i64 [[O3]], undef
+; CHECK-NEXT:    [[O5:%.*]] = or i64 [[O4]], undef
+; CHECK-NEXT:    [[O6:%.*]] = or i64 [[O5]], undef
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = or <8 x i64> [[TMP4]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i64> [[BIN_RDX]], <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -258,6 +285,7 @@ define i64 @load64le_nop_shift(i8* %arg) {
  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i64> [[BIN_RDX2]], <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = or <8 x i64> [[BIN_RDX2]], [[RDX_SHUF3]]
  ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i64> [[BIN_RDX4]], i32 0
+; CHECK-NEXT:    [[O7:%.*]] = or i64 [[O6]], undef
  ; CHECK-NEXT:    ret i64 [[TMP5]]
  ;
    %g1 = getelementptr inbounds i8, i8* %arg, i64 1
diff --git a/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/test/Transforms/SLPVectorizer/X86/horizontal-list.ll

index 1a2c66c6879abf7b777231d5b609782b4a964fd5..6c3994b0a22b39afb6dba184d125ca27f330e40a 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
+++ b/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
@@ -100,8 +100,16 @@ define float @bazz() {
  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr to <8 x float>*), align 16
  ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16
  ; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float undef, [[CONV]]
+; CHECK-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
+; CHECK-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
+; CHECK-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
  ; CHECK-NEXT:    [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2
  ; CHECK-NEXT:    [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float
+; CHECK-NEXT:    [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]]
+; CHECK-NEXT:    [[ADD19:%.*]] = fadd fast float undef, [[ADD7]]
+; CHECK-NEXT:    [[ADD19_1:%.*]] = fadd fast float undef, [[ADD19]]
+; CHECK-NEXT:    [[ADD19_2:%.*]] = fadd fast float undef, [[ADD19_1]]
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP3]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -111,6 +119,7 @@ define float @bazz() {
  ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]]
  ; CHECK-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV6]]
+; CHECK-NEXT:    [[ADD19_3:%.*]] = fadd fast float undef, [[ADD19_2]]
  ; CHECK-NEXT:    store float [[OP_EXTRA5]], float* @res, align 4
  ; CHECK-NEXT:    ret float [[OP_EXTRA5]]
  ;
@@ -122,8 +131,16 @@ define float @bazz() {
  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr to <8 x float>*), align 16
  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16
  ; THRESHOLD-NEXT:    [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]]
+; THRESHOLD-NEXT:    [[ADD:%.*]] = fadd fast float undef, [[CONV]]
+; THRESHOLD-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
+; THRESHOLD-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
+; THRESHOLD-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
  ; THRESHOLD-NEXT:    [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2
  ; THRESHOLD-NEXT:    [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float
+; THRESHOLD-NEXT:    [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]]
+; THRESHOLD-NEXT:    [[ADD19:%.*]] = fadd fast float undef, [[ADD7]]
+; THRESHOLD-NEXT:    [[ADD19_1:%.*]] = fadd fast float undef, [[ADD19]]
+; THRESHOLD-NEXT:    [[ADD19_2:%.*]] = fadd fast float undef, [[ADD19_1]]
  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP3]], [[RDX_SHUF]]
  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -133,6 +150,7 @@ define float @bazz() {
  ; THRESHOLD-NEXT:    [[TMP4:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
  ; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]]
  ; THRESHOLD-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV6]]
+; THRESHOLD-NEXT:    [[ADD19_3:%.*]] = fadd fast float undef, [[ADD19_2]]
  ; THRESHOLD-NEXT:    store float [[OP_EXTRA5]], float* @res, align 4
  ; THRESHOLD-NEXT:    ret float [[OP_EXTRA5]]
  ;
@@ -187,14 +205,17 @@ define float @bazzz() {
  ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
  ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
  ; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd fast float undef, undef
+; CHECK-NEXT:    [[TMP5:%.*]] = fadd fast float undef, [[TMP4]]
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
-; CHECK-NEXT:    store float [[TMP5]], float* @res, align 4
-; CHECK-NEXT:    ret float [[TMP5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = fadd fast float undef, [[TMP5]]
+; CHECK-NEXT:    [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]]
+; CHECK-NEXT:    store float [[TMP8]], float* @res, align 4
+; CHECK-NEXT:    ret float [[TMP8]]
  ;
  ; THRESHOLD-LABEL: @bazzz(
  ; THRESHOLD-NEXT:  entry:
@@ -203,14 +224,17 @@ define float @bazzz() {
  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
  ; THRESHOLD-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
+; THRESHOLD-NEXT:    [[TMP4:%.*]] = fadd fast float undef, undef
+; THRESHOLD-NEXT:    [[TMP5:%.*]] = fadd fast float undef, [[TMP4]]
  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]]
  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  ; THRESHOLD-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
-; THRESHOLD-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
-; THRESHOLD-NEXT:    [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
-; THRESHOLD-NEXT:    store float [[TMP5]], float* @res, align 4
-; THRESHOLD-NEXT:    ret float [[TMP5]]
+; THRESHOLD-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
+; THRESHOLD-NEXT:    [[TMP7:%.*]] = fadd fast float undef, [[TMP5]]
+; THRESHOLD-NEXT:    [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]]
+; THRESHOLD-NEXT:    store float [[TMP8]], float* @res, align 4
+; THRESHOLD-NEXT:    ret float [[TMP8]]
  ;
  entry:
    %0 = load i32, i32* @n, align 4
@@ -243,13 +267,16 @@ define i32 @foo() {
  ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
  ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
  ; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd fast float undef, undef
+; CHECK-NEXT:    [[TMP5:%.*]] = fadd fast float undef, [[TMP4]]
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
-; CHECK-NEXT:    [[CONV4:%.*]] = fptosi float [[TMP5]] to i32
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = fadd fast float undef, [[TMP5]]
+; CHECK-NEXT:    [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]]
+; CHECK-NEXT:    [[CONV4:%.*]] = fptosi float [[TMP8]] to i32
  ; CHECK-NEXT:    store i32 [[CONV4]], i32* @n, align 4
  ; CHECK-NEXT:    ret i32 [[CONV4]]
  ;
@@ -260,13 +287,16 @@ define i32 @foo() {
  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
  ; THRESHOLD-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
+; THRESHOLD-NEXT:    [[TMP4:%.*]] = fadd fast float undef, undef
+; THRESHOLD-NEXT:    [[TMP5:%.*]] = fadd fast float undef, [[TMP4]]
  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]]
  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  ; THRESHOLD-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
-; THRESHOLD-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
-; THRESHOLD-NEXT:    [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
-; THRESHOLD-NEXT:    [[CONV4:%.*]] = fptosi float [[TMP5]] to i32
+; THRESHOLD-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
+; THRESHOLD-NEXT:    [[TMP7:%.*]] = fadd fast float undef, [[TMP5]]
+; THRESHOLD-NEXT:    [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]]
+; THRESHOLD-NEXT:    [[CONV4:%.*]] = fptosi float [[TMP8]] to i32
  ; THRESHOLD-NEXT:    store i32 [[CONV4]], i32* @n, align 4
  ; THRESHOLD-NEXT:    ret i32 [[CONV4]]
  ;
@@ -300,6 +330,11 @@ define float @bar() {
  ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
  ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
  ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    [[CMP4:%.*]] = fcmp fast ogt float undef, undef
+; CHECK-NEXT:    [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float undef, float undef
+; CHECK-NEXT:    [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], undef
+; CHECK-NEXT:    [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float undef
+; CHECK-NEXT:    [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], undef
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float> [[TMP2]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP2]], <4 x float> [[RDX_SHUF]]
@@ -307,6 +342,7 @@ define float @bar() {
  ; CHECK-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF1]]
  ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT3]], i32 0
+; CHECK-NEXT:    [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float undef
  ; CHECK-NEXT:    store float [[TMP3]], float* @res, align 4
  ; CHECK-NEXT:    ret float [[TMP3]]
  ;
@@ -315,6 +351,11 @@ define float @bar() {
  ; THRESHOLD-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]]
+; THRESHOLD-NEXT:    [[CMP4:%.*]] = fcmp fast ogt float undef, undef
+; THRESHOLD-NEXT:    [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float undef, float undef
+; THRESHOLD-NEXT:    [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], undef
+; THRESHOLD-NEXT:    [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float undef
+; THRESHOLD-NEXT:    [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], undef
  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; THRESHOLD-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float> [[TMP2]], [[RDX_SHUF]]
  ; THRESHOLD-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP2]], <4 x float> [[RDX_SHUF]]
@@ -322,6 +363,7 @@ define float @bar() {
  ; THRESHOLD-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
  ; THRESHOLD-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF1]]
  ; THRESHOLD-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT3]], i32 0
+; THRESHOLD-NEXT:    [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float undef
  ; THRESHOLD-NEXT:    store float [[TMP3]], float* @res, align 4
  ; THRESHOLD-NEXT:    ret float [[TMP3]]
  ;
@@ -368,6 +410,21 @@ define float @f(float* nocapture readonly %x) {
  ; CHECK-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15
  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>*
  ; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, undef
+; CHECK-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
+; CHECK-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
+; CHECK-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
+; CHECK-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
+; CHECK-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
+; CHECK-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
+; CHECK-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
+; CHECK-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
+; CHECK-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
+; CHECK-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
+; CHECK-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
+; CHECK-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
+; CHECK-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
+; CHECK-NEXT:    [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
  ; CHECK-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16
  ; CHECK-NEXT:    [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17
  ; CHECK-NEXT:    [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18
@@ -402,6 +459,37 @@ define float @f(float* nocapture readonly %x) {
  ; CHECK-NEXT:    [[ARRAYIDX_47:%.*]] = getelementptr inbounds float, float* [[X]], i64 47
  ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>*
  ; CHECK-NEXT:    [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
+; CHECK-NEXT:    [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
+; CHECK-NEXT:    [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
+; CHECK-NEXT:    [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
+; CHECK-NEXT:    [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
+; CHECK-NEXT:    [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
+; CHECK-NEXT:    [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
+; CHECK-NEXT:    [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
+; CHECK-NEXT:    [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
+; CHECK-NEXT:    [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
+; CHECK-NEXT:    [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
+; CHECK-NEXT:    [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
+; CHECK-NEXT:    [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
+; CHECK-NEXT:    [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
+; CHECK-NEXT:    [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]]
+; CHECK-NEXT:    [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]]
+; CHECK-NEXT:    [[ADD_32:%.*]] = fadd fast float undef, [[ADD_31]]
+; CHECK-NEXT:    [[ADD_33:%.*]] = fadd fast float undef, [[ADD_32]]
+; CHECK-NEXT:    [[ADD_34:%.*]] = fadd fast float undef, [[ADD_33]]
+; CHECK-NEXT:    [[ADD_35:%.*]] = fadd fast float undef, [[ADD_34]]
+; CHECK-NEXT:    [[ADD_36:%.*]] = fadd fast float undef, [[ADD_35]]
+; CHECK-NEXT:    [[ADD_37:%.*]] = fadd fast float undef, [[ADD_36]]
+; CHECK-NEXT:    [[ADD_38:%.*]] = fadd fast float undef, [[ADD_37]]
+; CHECK-NEXT:    [[ADD_39:%.*]] = fadd fast float undef, [[ADD_38]]
+; CHECK-NEXT:    [[ADD_40:%.*]] = fadd fast float undef, [[ADD_39]]
+; CHECK-NEXT:    [[ADD_41:%.*]] = fadd fast float undef, [[ADD_40]]
+; CHECK-NEXT:    [[ADD_42:%.*]] = fadd fast float undef, [[ADD_41]]
+; CHECK-NEXT:    [[ADD_43:%.*]] = fadd fast float undef, [[ADD_42]]
+; CHECK-NEXT:    [[ADD_44:%.*]] = fadd fast float undef, [[ADD_43]]
+; CHECK-NEXT:    [[ADD_45:%.*]] = fadd fast float undef, [[ADD_44]]
+; CHECK-NEXT:    [[ADD_46:%.*]] = fadd fast float undef, [[ADD_45]]
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP3]], <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP3]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -423,6 +511,7 @@ define float @f(float* nocapture readonly %x) {
  ; CHECK-NEXT:    [[BIN_RDX16:%.*]] = fadd fast <16 x float> [[BIN_RDX14]], [[RDX_SHUF15]]
  ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <16 x float> [[BIN_RDX16]], i32 0
  ; CHECK-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    [[ADD_47:%.*]] = fadd fast float undef, [[ADD_46]]
  ; CHECK-NEXT:    ret float [[OP_RDX]]
  ;
  ; THRESHOLD-LABEL: @f(
@@ -444,6 +533,21 @@ define float @f(float* nocapture readonly %x) {
  ; THRESHOLD-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15
  ; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>*
  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4
+; THRESHOLD-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, undef
+; THRESHOLD-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
+; THRESHOLD-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
+; THRESHOLD-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
+; THRESHOLD-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
+; THRESHOLD-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
+; THRESHOLD-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
+; THRESHOLD-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
+; THRESHOLD-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
+; THRESHOLD-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
+; THRESHOLD-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
+; THRESHOLD-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
+; THRESHOLD-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
+; THRESHOLD-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
+; THRESHOLD-NEXT:    [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
  ; THRESHOLD-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16
  ; THRESHOLD-NEXT:    [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17
  ; THRESHOLD-NEXT:    [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18
@@ -478,6 +582,37 @@ define float @f(float* nocapture readonly %x) {
  ; THRESHOLD-NEXT:    [[ARRAYIDX_47:%.*]] = getelementptr inbounds float, float* [[X]], i64 47
  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>*
  ; THRESHOLD-NEXT:    [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4
+; THRESHOLD-NEXT:    [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
+; THRESHOLD-NEXT:    [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
+; THRESHOLD-NEXT:    [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
+; THRESHOLD-NEXT:    [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
+; THRESHOLD-NEXT:    [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
+; THRESHOLD-NEXT:    [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
+; THRESHOLD-NEXT:    [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
+; THRESHOLD-NEXT:    [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
+; THRESHOLD-NEXT:    [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
+; THRESHOLD-NEXT:    [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
+; THRESHOLD-NEXT:    [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
+; THRESHOLD-NEXT:    [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
+; THRESHOLD-NEXT:    [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
+; THRESHOLD-NEXT:    [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
+; THRESHOLD-NEXT:    [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]]
+; THRESHOLD-NEXT:    [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]]
+; THRESHOLD-NEXT:    [[ADD_32:%.*]] = fadd fast float undef, [[ADD_31]]
+; THRESHOLD-NEXT:    [[ADD_33:%.*]] = fadd fast float undef, [[ADD_32]]
+; THRESHOLD-NEXT:    [[ADD_34:%.*]] = fadd fast float undef, [[ADD_33]]
+; THRESHOLD-NEXT:    [[ADD_35:%.*]] = fadd fast float undef, [[ADD_34]]
+; THRESHOLD-NEXT:    [[ADD_36:%.*]] = fadd fast float undef, [[ADD_35]]
+; THRESHOLD-NEXT:    [[ADD_37:%.*]] = fadd fast float undef, [[ADD_36]]
+; THRESHOLD-NEXT:    [[ADD_38:%.*]] = fadd fast float undef, [[ADD_37]]
+; THRESHOLD-NEXT:    [[ADD_39:%.*]] = fadd fast float undef, [[ADD_38]]
+; THRESHOLD-NEXT:    [[ADD_40:%.*]] = fadd fast float undef, [[ADD_39]]
+; THRESHOLD-NEXT:    [[ADD_41:%.*]] = fadd fast float undef, [[ADD_40]]
+; THRESHOLD-NEXT:    [[ADD_42:%.*]] = fadd fast float undef, [[ADD_41]]
+; THRESHOLD-NEXT:    [[ADD_43:%.*]] = fadd fast float undef, [[ADD_42]]
+; THRESHOLD-NEXT:    [[ADD_44:%.*]] = fadd fast float undef, [[ADD_43]]
+; THRESHOLD-NEXT:    [[ADD_45:%.*]] = fadd fast float undef, [[ADD_44]]
+; THRESHOLD-NEXT:    [[ADD_46:%.*]] = fadd fast float undef, [[ADD_45]]
  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP3]], <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP3]], [[RDX_SHUF]]
  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -499,6 +634,7 @@ define float @f(float* nocapture readonly %x) {
  ; THRESHOLD-NEXT:    [[BIN_RDX16:%.*]] = fadd fast <16 x float> [[BIN_RDX14]], [[RDX_SHUF15]]
  ; THRESHOLD-NEXT:    [[TMP5:%.*]] = extractelement <16 x float> [[BIN_RDX16]], i32 0
  ; THRESHOLD-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]]
+; THRESHOLD-NEXT:    [[ADD_47:%.*]] = fadd fast float undef, [[ADD_46]]
  ; THRESHOLD-NEXT:    ret float [[OP_RDX]]
  ;
    entry:
@@ -685,6 +821,37 @@ define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) {
  ; CHECK-NEXT:    [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31
  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>*
  ; CHECK-NEXT:    [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float undef, [[CONV]]
+; CHECK-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
+; CHECK-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
+; CHECK-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
+; CHECK-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
+; CHECK-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
+; CHECK-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
+; CHECK-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
+; CHECK-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
+; CHECK-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
+; CHECK-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
+; CHECK-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
+; CHECK-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
+; CHECK-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
+; CHECK-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
+; CHECK-NEXT:    [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
+; CHECK-NEXT:    [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
+; CHECK-NEXT:    [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
+; CHECK-NEXT:    [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
+; CHECK-NEXT:    [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
+; CHECK-NEXT:    [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
+; CHECK-NEXT:    [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
+; CHECK-NEXT:    [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
+; CHECK-NEXT:    [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
+; CHECK-NEXT:    [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
+; CHECK-NEXT:    [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
+; CHECK-NEXT:    [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
+; CHECK-NEXT:    [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
+; CHECK-NEXT:    [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
+; CHECK-NEXT:    [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
+; CHECK-NEXT:    [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]]
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP1]], <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP1]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -697,6 +864,7 @@ define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) {
  ; CHECK-NEXT:    [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]]
  ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0
  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]]
+; CHECK-NEXT:    [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]]
  ; CHECK-NEXT:    ret float [[OP_EXTRA]]
  ;
  ; THRESHOLD-LABEL: @f1(
@@ -736,6 +904,37 @@ define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) {
  ; THRESHOLD-NEXT:    [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31
  ; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>*
  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4
+; THRESHOLD-NEXT:    [[ADD:%.*]] = fadd fast float undef, [[CONV]]
+; THRESHOLD-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
+; THRESHOLD-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
+; THRESHOLD-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
+; THRESHOLD-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
+; THRESHOLD-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
+; THRESHOLD-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
+; THRESHOLD-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
+; THRESHOLD-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
+; THRESHOLD-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
+; THRESHOLD-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
+; THRESHOLD-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
+; THRESHOLD-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
+; THRESHOLD-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
+; THRESHOLD-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
+; THRESHOLD-NEXT:    [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
+; THRESHOLD-NEXT:    [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
+; THRESHOLD-NEXT:    [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
+; THRESHOLD-NEXT:    [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
+; THRESHOLD-NEXT:    [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
+; THRESHOLD-NEXT:    [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
+; THRESHOLD-NEXT:    [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
+; THRESHOLD-NEXT:    [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
+; THRESHOLD-NEXT:    [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
+; THRESHOLD-NEXT:    [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
+; THRESHOLD-NEXT:    [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
+; THRESHOLD-NEXT:    [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
+; THRESHOLD-NEXT:    [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
+; THRESHOLD-NEXT:    [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
+; THRESHOLD-NEXT:    [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
+; THRESHOLD-NEXT:    [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]]
  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP1]], <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP1]], [[RDX_SHUF]]
  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -748,6 +947,7 @@ define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) {
  ; THRESHOLD-NEXT:    [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]]
  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0
  ; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]]
+; THRESHOLD-NEXT:    [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]]
  ; THRESHOLD-NEXT:    ret float [[OP_EXTRA]]
  ;
    entry:
@@ -858,12 +1058,17 @@ define float @loadadd31(float* nocapture readonly %x) {
  ; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4
  ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
  ; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4
+; CHECK-NEXT:    [[ADD_1:%.*]] = fadd fast float [[TMP1]], [[TMP0]]
  ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
  ; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
  ; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
  ; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
  ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>*
  ; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
+; CHECK-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
+; CHECK-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
+; CHECK-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
  ; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
  ; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 8
  ; CHECK-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 9
@@ -874,6 +1079,14 @@ define float @loadadd31(float* nocapture readonly %x) {
  ; CHECK-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 14
  ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>*
  ; CHECK-NEXT:    [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4
+; CHECK-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
+; CHECK-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
+; CHECK-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
+; CHECK-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
+; CHECK-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
+; CHECK-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
+; CHECK-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
+; CHECK-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
  ; CHECK-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15
  ; CHECK-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 16
  ; CHECK-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17
@@ -892,6 +1105,21 @@ define float @loadadd31(float* nocapture readonly %x) {
  ; CHECK-NEXT:    [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30
  ; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>*
  ; CHECK-NEXT:    [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4
+; CHECK-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
+; CHECK-NEXT:    [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
+; CHECK-NEXT:    [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
+; CHECK-NEXT:    [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
+; CHECK-NEXT:    [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
+; CHECK-NEXT:    [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
+; CHECK-NEXT:    [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
+; CHECK-NEXT:    [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
+; CHECK-NEXT:    [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
+; CHECK-NEXT:    [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
+; CHECK-NEXT:    [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
+; CHECK-NEXT:    [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
+; CHECK-NEXT:    [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
+; CHECK-NEXT:    [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
+; CHECK-NEXT:    [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP7]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[BIN_RDX]], <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -917,6 +1145,7 @@ define float @loadadd31(float* nocapture readonly %x) {
  ; CHECK-NEXT:    [[OP_RDX17:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]]
  ; CHECK-NEXT:    [[TMP11:%.*]] = fadd fast float [[OP_RDX17]], [[TMP1]]
  ; CHECK-NEXT:    [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]]
+; CHECK-NEXT:    [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
  ; CHECK-NEXT:    ret float [[TMP12]]
  ;
  ; THRESHOLD-LABEL: @loadadd31(
@@ -925,12 +1154,17 @@ define float @loadadd31(float* nocapture readonly %x) {
  ; THRESHOLD-NEXT:    [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4
  ; THRESHOLD-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4
+; THRESHOLD-NEXT:    [[ADD_1:%.*]] = fadd fast float [[TMP1]], [[TMP0]]
  ; THRESHOLD-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
  ; THRESHOLD-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
  ; THRESHOLD-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
  ; THRESHOLD-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>*
  ; THRESHOLD-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
+; THRESHOLD-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
+; THRESHOLD-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
+; THRESHOLD-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
+; THRESHOLD-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
  ; THRESHOLD-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
  ; THRESHOLD-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 8
  ; THRESHOLD-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 9
@@ -941,6 +1175,14 @@ define float @loadadd31(float* nocapture readonly %x) {
  ; THRESHOLD-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 14
  ; THRESHOLD-NEXT:    [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>*
  ; THRESHOLD-NEXT:    [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4
+; THRESHOLD-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
+; THRESHOLD-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
+; THRESHOLD-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
+; THRESHOLD-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
+; THRESHOLD-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
+; THRESHOLD-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
+; THRESHOLD-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
+; THRESHOLD-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
  ; THRESHOLD-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15
  ; THRESHOLD-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 16
  ; THRESHOLD-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17
@@ -959,6 +1201,21 @@ define float @loadadd31(float* nocapture readonly %x) {
  ; THRESHOLD-NEXT:    [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30
  ; THRESHOLD-NEXT:    [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>*
  ; THRESHOLD-NEXT:    [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4
+; THRESHOLD-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
+; THRESHOLD-NEXT:    [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
+; THRESHOLD-NEXT:    [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
+; THRESHOLD-NEXT:    [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
+; THRESHOLD-NEXT:    [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
+; THRESHOLD-NEXT:    [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
+; THRESHOLD-NEXT:    [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
+; THRESHOLD-NEXT:    [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
+; THRESHOLD-NEXT:    [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
+; THRESHOLD-NEXT:    [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
+; THRESHOLD-NEXT:    [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
+; THRESHOLD-NEXT:    [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
+; THRESHOLD-NEXT:    [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
+; THRESHOLD-NEXT:    [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
+; THRESHOLD-NEXT:    [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP7]], [[RDX_SHUF]]
  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[BIN_RDX]], <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -984,6 +1241,7 @@ define float @loadadd31(float* nocapture readonly %x) {
  ; THRESHOLD-NEXT:    [[OP_RDX17:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]]
  ; THRESHOLD-NEXT:    [[TMP11:%.*]] = fadd fast float [[OP_RDX17]], [[TMP1]]
  ; THRESHOLD-NEXT:    [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]]
+; THRESHOLD-NEXT:    [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
  ; THRESHOLD-NEXT:    ret float [[TMP12]]
  ;
    entry:
@@ -1094,6 +1352,14 @@ define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) {
  ; CHECK-NEXT:    [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
+; CHECK-NEXT:    [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
+; CHECK-NEXT:    [[ADD5:%.*]] = fadd fast float [[ADD4]], [[CONV]]
+; CHECK-NEXT:    [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]]
+; CHECK-NEXT:    [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
+; CHECK-NEXT:    [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
+; CHECK-NEXT:    [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]]
+; CHECK-NEXT:    [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -1103,6 +1369,7 @@ define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) {
  ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
  ; CHECK-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]]
+; CHECK-NEXT:    [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
  ; CHECK-NEXT:    ret float [[OP_EXTRA5]]
  ;
  ; THRESHOLD-LABEL: @extra_args(
@@ -1119,6 +1386,14 @@ define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) {
  ; THRESHOLD-NEXT:    [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
  ; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
+; THRESHOLD-NEXT:    [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
+; THRESHOLD-NEXT:    [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
+; THRESHOLD-NEXT:    [[ADD5:%.*]] = fadd fast float [[ADD4]], [[CONV]]
+; THRESHOLD-NEXT:    [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]]
+; THRESHOLD-NEXT:    [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
+; THRESHOLD-NEXT:    [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
+; THRESHOLD-NEXT:    [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]]
+; THRESHOLD-NEXT:    [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]]
  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -1128,6 +1403,7 @@ define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) {
  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
  ; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
  ; THRESHOLD-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]]
+; THRESHOLD-NEXT:    [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
  ; THRESHOLD-NEXT:    ret float [[OP_EXTRA5]]
  ;
    entry:
@@ -1176,6 +1452,16 @@ define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a
  ; CHECK-NEXT:    [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
+; CHECK-NEXT:    [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
+; CHECK-NEXT:    [[ADD41:%.*]] = fadd fast float [[ADD4]], 5.000000e+00
+; CHECK-NEXT:    [[ADD5:%.*]] = fadd fast float [[ADD41]], [[CONV]]
+; CHECK-NEXT:    [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]]
+; CHECK-NEXT:    [[ADD4_11:%.*]] = fadd fast float [[ADD4_1]], 5.000000e+00
+; CHECK-NEXT:    [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_11]]
+; CHECK-NEXT:    [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
+; CHECK-NEXT:    [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]]
+; CHECK-NEXT:    [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -1187,6 +1473,7 @@ define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a
  ; CHECK-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], 5.000000e+00
  ; CHECK-NEXT:    [[OP_EXTRA6:%.*]] = fadd fast float [[OP_EXTRA5]], 5.000000e+00
  ; CHECK-NEXT:    [[OP_EXTRA7:%.*]] = fadd fast float [[OP_EXTRA6]], [[CONV]]
+; CHECK-NEXT:    [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
  ; CHECK-NEXT:    ret float [[OP_EXTRA7]]
  ;
  ; THRESHOLD-LABEL: @extra_args_same_several_times(
@@ -1203,6 +1490,16 @@ define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a
  ; THRESHOLD-NEXT:    [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
  ; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
+; THRESHOLD-NEXT:    [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
+; THRESHOLD-NEXT:    [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
+; THRESHOLD-NEXT:    [[ADD41:%.*]] = fadd fast float [[ADD4]], 5.000000e+00
+; THRESHOLD-NEXT:    [[ADD5:%.*]] = fadd fast float [[ADD41]], [[CONV]]
+; THRESHOLD-NEXT:    [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]]
+; THRESHOLD-NEXT:    [[ADD4_11:%.*]] = fadd fast float [[ADD4_1]], 5.000000e+00
+; THRESHOLD-NEXT:    [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_11]]
+; THRESHOLD-NEXT:    [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
+; THRESHOLD-NEXT:    [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]]
+; THRESHOLD-NEXT:    [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]]
  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -1214,6 +1511,7 @@ define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a
  ; THRESHOLD-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], 5.000000e+00
  ; THRESHOLD-NEXT:    [[OP_EXTRA6:%.*]] = fadd fast float [[OP_EXTRA5]], 5.000000e+00
  ; THRESHOLD-NEXT:    [[OP_EXTRA7:%.*]] = fadd fast float [[OP_EXTRA6]], [[CONV]]
+; THRESHOLD-NEXT:    [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
  ; THRESHOLD-NEXT:    ret float [[OP_EXTRA7]]
  ;
    entry:
@@ -1266,6 +1564,14 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b
  ; CHECK-NEXT:    [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
+; CHECK-NEXT:    [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
+; CHECK-NEXT:    [[ADD4_1:%.*]] = fadd fast float undef, [[ADD4]]
+; CHECK-NEXT:    [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
+; CHECK-NEXT:    [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
+; CHECK-NEXT:    [[ADD5:%.*]] = fadd fast float [[ADD4_3]], [[CONV]]
+; CHECK-NEXT:    [[ADD4_4:%.*]] = fadd fast float undef, [[ADD5]]
+; CHECK-NEXT:    [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -1275,6 +1581,7 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b
  ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
  ; CHECK-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]]
+; CHECK-NEXT:    [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
  ; CHECK-NEXT:    ret float [[OP_EXTRA5]]
  ;
  ; THRESHOLD-LABEL: @extra_args_no_replace(
@@ -1293,6 +1600,14 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b
  ; THRESHOLD-NEXT:    [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
  ; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
  ; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
+; THRESHOLD-NEXT:    [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
+; THRESHOLD-NEXT:    [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
+; THRESHOLD-NEXT:    [[ADD4_1:%.*]] = fadd fast float undef, [[ADD4]]
+; THRESHOLD-NEXT:    [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
+; THRESHOLD-NEXT:    [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
+; THRESHOLD-NEXT:    [[ADD5:%.*]] = fadd fast float [[ADD4_3]], [[CONV]]
+; THRESHOLD-NEXT:    [[ADD4_4:%.*]] = fadd fast float undef, [[ADD5]]
+; THRESHOLD-NEXT:    [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]]
  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -1302,6 +1617,7 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b
  ; THRESHOLD-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
  ; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
  ; THRESHOLD-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]]
+; THRESHOLD-NEXT:    [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
  ; THRESHOLD-NEXT:    ret float [[OP_EXTRA5]]
  ;
    entry:
@@ -1352,6 +1668,10 @@ define i32 @wobble(i32 %arg, i32 %bar) {
  ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3
  ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer
  ; CHECK-NEXT:    [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32>
+; CHECK-NEXT:    [[R1:%.*]] = add nuw i32 [[ARG]], undef
+; CHECK-NEXT:    [[R2:%.*]] = add nsw i32 [[R1]], undef
+; CHECK-NEXT:    [[R3:%.*]] = add nsw i32 [[R2]], undef
+; CHECK-NEXT:    [[R4:%.*]] = add nsw i32 [[R3]], undef
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
@@ -1359,6 +1679,7 @@ define i32 @wobble(i32 %arg, i32 %bar) {
  ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]]
  ; CHECK-NEXT:    [[OP_EXTRA3:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]]
+; CHECK-NEXT:    [[R5:%.*]] = add nsw i32 [[R4]], [[TMP9]]
  ; CHECK-NEXT:    ret i32 [[OP_EXTRA3]]
  ;
  ; THRESHOLD-LABEL: @wobble(
@@ -1375,6 +1696,10 @@ define i32 @wobble(i32 %arg, i32 %bar) {
  ; THRESHOLD-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3
  ; THRESHOLD-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer
  ; THRESHOLD-NEXT:    [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32>
+; THRESHOLD-NEXT:    [[R1:%.*]] = add nuw i32 [[ARG]], undef
+; THRESHOLD-NEXT:    [[R2:%.*]] = add nsw i32 [[R1]], undef
+; THRESHOLD-NEXT:    [[R3:%.*]] = add nsw i32 [[R2]], undef
+; THRESHOLD-NEXT:    [[R4:%.*]] = add nsw i32 [[R3]], undef
  ; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]], [[RDX_SHUF]]
  ; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
@@ -1382,6 +1707,7 @@ define i32 @wobble(i32 %arg, i32 %bar) {
  ; THRESHOLD-NEXT:    [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
  ; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]]
  ; THRESHOLD-NEXT:    [[OP_EXTRA3:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]]
+; THRESHOLD-NEXT:    [[R5:%.*]] = add nsw i32 [[R4]], [[TMP9]]
  ; THRESHOLD-NEXT:    ret i32 [[OP_EXTRA3]]
  ;
    bb:
diff --git a/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll

index 8b0ae669ae795ca51620cd0d11d17047eda4e36f..8432b910d91931cc42d6315249bdababfa5875c0 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
+++ b/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
@@ -12,6 +12,19 @@
  define i32 @maxi8(i32) {
  ; CHECK-LABEL: @maxi8(
  ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i32 undef, undef
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef
+; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef
+; CHECK-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
+; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
+; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
+; CHECK-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP2]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP2]], <8 x i32> [[RDX_SHUF]]
@@ -21,8 +34,9 @@ define i32 @maxi8(i32) {
  ; CHECK-NEXT:    [[RDX_SHUF4:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]]
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0
-; CHECK-NEXT:    ret i32 [[TMP3]]
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0
+; CHECK-NEXT:    [[TMP17:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef
+; CHECK-NEXT:    ret i32 [[TMP16]]
  ;
    %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
    %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
@@ -52,6 +66,35 @@ define i32 @maxi8(i32) {
  define i32 @maxi16(i32) {
  ; CHECK-LABEL: @maxi16(
  ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr to <16 x i32>*), align 16
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i32 undef, undef
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef
+; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef
+; CHECK-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
+; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
+; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
+; CHECK-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef
+; CHECK-NEXT:    [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef
+; CHECK-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 undef
+; CHECK-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef
+; CHECK-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 undef
+; CHECK-NEXT:    [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef
+; CHECK-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef
+; CHECK-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef
+; CHECK-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef
+; CHECK-NEXT:    [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef
+; CHECK-NEXT:    [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef
+; CHECK-NEXT:    [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef
+; CHECK-NEXT:    [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef
+; CHECK-NEXT:    [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef
+; CHECK-NEXT:    [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 undef
+; CHECK-NEXT:    [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <16 x i32> [[TMP2]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x i32> [[TMP2]], <16 x i32> [[RDX_SHUF]]
@@ -64,8 +107,9 @@ define i32 @maxi16(i32) {
  ; CHECK-NEXT:    [[RDX_SHUF7:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]]
  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> [[RDX_SHUF7]]
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0
-; CHECK-NEXT:    ret i32 [[TMP3]]
+; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0
+; CHECK-NEXT:    [[TMP33:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 undef
+; CHECK-NEXT:    ret i32 [[TMP32]]
  ;
    %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
    %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
@@ -119,6 +163,67 @@ define i32 @maxi16(i32) {
  define i32 @maxi32(i32) {
  ; CHECK-LABEL: @maxi32(
  ; CHECK-NEXT:    [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr to <32 x i32>*), align 16
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i32 undef, undef
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef
+; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef
+; CHECK-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
+; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
+; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
+; CHECK-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef
+; CHECK-NEXT:    [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef
+; CHECK-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 undef
+; CHECK-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef
+; CHECK-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 undef
+; CHECK-NEXT:    [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef
+; CHECK-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef
+; CHECK-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef
+; CHECK-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef
+; CHECK-NEXT:    [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef
+; CHECK-NEXT:    [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef
+; CHECK-NEXT:    [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef
+; CHECK-NEXT:    [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef
+; CHECK-NEXT:    [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef
+; CHECK-NEXT:    [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 undef
+; CHECK-NEXT:    [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef
+; CHECK-NEXT:    [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 undef
+; CHECK-NEXT:    [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], undef
+; CHECK-NEXT:    [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32 undef
+; CHECK-NEXT:    [[TMP35:%.*]] = icmp sgt i32 [[TMP34]], undef
+; CHECK-NEXT:    [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP34]], i32 undef
+; CHECK-NEXT:    [[TMP37:%.*]] = icmp sgt i32 [[TMP36]], undef
+; CHECK-NEXT:    [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[TMP36]], i32 undef
+; CHECK-NEXT:    [[TMP39:%.*]] = icmp sgt i32 [[TMP38]], undef
+; CHECK-NEXT:    [[TMP40:%.*]] = select i1 [[TMP39]], i32 [[TMP38]], i32 undef
+; CHECK-NEXT:    [[TMP41:%.*]] = icmp sgt i32 [[TMP40]], undef
+; CHECK-NEXT:    [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP40]], i32 undef
+; CHECK-NEXT:    [[TMP43:%.*]] = icmp sgt i32 [[TMP42]], undef
+; CHECK-NEXT:    [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP42]], i32 undef
+; CHECK-NEXT:    [[TMP45:%.*]] = icmp sgt i32 [[TMP44]], undef
+; CHECK-NEXT:    [[TMP46:%.*]] = select i1 [[TMP45]], i32 [[TMP44]], i32 undef
+; CHECK-NEXT:    [[TMP47:%.*]] = icmp sgt i32 [[TMP46]], undef
+; CHECK-NEXT:    [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP46]], i32 undef
+; CHECK-NEXT:    [[TMP49:%.*]] = icmp sgt i32 [[TMP48]], undef
+; CHECK-NEXT:    [[TMP50:%.*]] = select i1 [[TMP49]], i32 [[TMP48]], i32 undef
+; CHECK-NEXT:    [[TMP51:%.*]] = icmp sgt i32 [[TMP50]], undef
+; CHECK-NEXT:    [[TMP52:%.*]] = select i1 [[TMP51]], i32 [[TMP50]], i32 undef
+; CHECK-NEXT:    [[TMP53:%.*]] = icmp sgt i32 [[TMP52]], undef
+; CHECK-NEXT:    [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP52]], i32 undef
+; CHECK-NEXT:    [[TMP55:%.*]] = icmp sgt i32 [[TMP54]], undef
+; CHECK-NEXT:    [[TMP56:%.*]] = select i1 [[TMP55]], i32 [[TMP54]], i32 undef
+; CHECK-NEXT:    [[TMP57:%.*]] = icmp sgt i32 [[TMP56]], undef
+; CHECK-NEXT:    [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32 undef
+; CHECK-NEXT:    [[TMP59:%.*]] = icmp sgt i32 [[TMP58]], undef
+; CHECK-NEXT:    [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP58]], i32 undef
+; CHECK-NEXT:    [[TMP61:%.*]] = icmp sgt i32 [[TMP60]], undef
+; CHECK-NEXT:    [[TMP62:%.*]] = select i1 [[TMP61]], i32 [[TMP60]], i32 undef
+; CHECK-NEXT:    [[TMP63:%.*]] = icmp sgt i32 [[TMP62]], undef
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP2]], <32 x i32> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <32 x i32> [[TMP2]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x i32> [[TMP2]], <32 x i32> [[RDX_SHUF]]
@@ -134,8 +239,9 @@ define i32 @maxi32(i32) {
  ; CHECK-NEXT:    [[RDX_SHUF10:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[RDX_MINMAX_CMP11:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]]
  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> [[RDX_SHUF10]]
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0
-; CHECK-NEXT:    ret i32 [[TMP3]]
+; CHECK-NEXT:    [[TMP64:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0
+; CHECK-NEXT:    [[TMP65:%.*]] = select i1 [[TMP63]], i32 [[TMP62]], i32 undef
+; CHECK-NEXT:    ret i32 [[TMP64]]
  ;
    %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
    %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
@@ -237,6 +343,19 @@ define i32 @maxi32(i32) {
  define float @maxf8(float) {
  ; CHECK-LABEL: @maxf8(
  ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr1 to <8 x float>*), align 16
+; CHECK-NEXT:    [[TMP3:%.*]] = fcmp fast ogt float undef, undef
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef
+; CHECK-NEXT:    [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef
+; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef
+; CHECK-NEXT:    [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef
+; CHECK-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef
+; CHECK-NEXT:    [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef
+; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef
+; CHECK-NEXT:    [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef
+; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef
+; CHECK-NEXT:    [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef
+; CHECK-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef
+; CHECK-NEXT:    [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <8 x float> [[TMP2]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]]
@@ -246,8 +365,9 @@ define float @maxf8(float) {
  ; CHECK-NEXT:    [[RDX_SHUF4:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> [[RDX_SHUF4]]
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0
-; CHECK-NEXT:    ret float [[TMP3]]
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0
+; CHECK-NEXT:    [[TMP17:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef
+; CHECK-NEXT:    ret float [[TMP16]]
  ;
    %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
    %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
@@ -277,6 +397,35 @@ define float @maxf8(float) {
  define float @maxf16(float) {
  ; CHECK-LABEL: @maxf16(
  ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr1 to <16 x float>*), align 16
+; CHECK-NEXT:    [[TMP3:%.*]] = fcmp fast ogt float undef, undef
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef
+; CHECK-NEXT:    [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef
+; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef
+; CHECK-NEXT:    [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef
+; CHECK-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef
+; CHECK-NEXT:    [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef
+; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef
+; CHECK-NEXT:    [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef
+; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef
+; CHECK-NEXT:    [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef
+; CHECK-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef
+; CHECK-NEXT:    [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef
+; CHECK-NEXT:    [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef
+; CHECK-NEXT:    [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef
+; CHECK-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]], float undef
+; CHECK-NEXT:    [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef
+; CHECK-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]], float undef
+; CHECK-NEXT:    [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef
+; CHECK-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]], float undef
+; CHECK-NEXT:    [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef
+; CHECK-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]], float undef
+; CHECK-NEXT:    [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef
+; CHECK-NEXT:    [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]], float undef
+; CHECK-NEXT:    [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef
+; CHECK-NEXT:    [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]], float undef
+; CHECK-NEXT:    [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef
+; CHECK-NEXT:    [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]], float undef
+; CHECK-NEXT:    [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <16 x float> [[TMP2]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]]
@@ -289,8 +438,9 @@ define float @maxf16(float) {
  ; CHECK-NEXT:    [[RDX_SHUF7:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]]
  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> [[RDX_SHUF7]]
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0
-; CHECK-NEXT:    ret float [[TMP3]]
+; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0
+; CHECK-NEXT:    [[TMP33:%.*]] = select i1 [[TMP31]], float [[TMP30]], float undef
+; CHECK-NEXT:    ret float [[TMP32]]
  ;
    %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
    %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
@@ -344,6 +494,67 @@ define float @maxf16(float) {
  define float @maxf32(float) {
  ; CHECK-LABEL: @maxf32(
  ; CHECK-NEXT:    [[TMP2:%.*]] = load <32 x float>, <32 x float>* bitcast ([32 x float]* @arr1 to <32 x float>*), align 16
+; CHECK-NEXT:    [[TMP3:%.*]] = fcmp fast ogt float undef, undef
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef
+; CHECK-NEXT:    [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef
+; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef
+; CHECK-NEXT:    [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef
+; CHECK-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef
+; CHECK-NEXT:    [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef
+; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef
+; CHECK-NEXT:    [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef
+; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef
+; CHECK-NEXT:    [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef
+; CHECK-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef
+; CHECK-NEXT:    [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef
+; CHECK-NEXT:    [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef
+; CHECK-NEXT:    [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef
+; CHECK-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]], float undef
+; CHECK-NEXT:    [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef
+; CHECK-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]], float undef
+; CHECK-NEXT:    [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef
+; CHECK-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]], float undef
+; CHECK-NEXT:    [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef
+; CHECK-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]], float undef
+; CHECK-NEXT:    [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef
+; CHECK-NEXT:    [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]], float undef
+; CHECK-NEXT:    [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef
+; CHECK-NEXT:    [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]], float undef
+; CHECK-NEXT:    [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef
+; CHECK-NEXT:    [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]], float undef
+; CHECK-NEXT:    [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef
+; CHECK-NEXT:    [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP30]], float undef
+; CHECK-NEXT:    [[TMP33:%.*]] = fcmp fast ogt float [[TMP32]], undef
+; CHECK-NEXT:    [[TMP34:%.*]] = select i1 [[TMP33]], float [[TMP32]], float undef
+; CHECK-NEXT:    [[TMP35:%.*]] = fcmp fast ogt float [[TMP34]], undef
+; CHECK-NEXT:    [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP34]], float undef
+; CHECK-NEXT:    [[TMP37:%.*]] = fcmp fast ogt float [[TMP36]], undef
+; CHECK-NEXT:    [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP36]], float undef
+; CHECK-NEXT:    [[TMP39:%.*]] = fcmp fast ogt float [[TMP38]], undef
+; CHECK-NEXT:    [[TMP40:%.*]] = select i1 [[TMP39]], float [[TMP38]], float undef
+; CHECK-NEXT:    [[TMP41:%.*]] = fcmp fast ogt float [[TMP40]], undef
+; CHECK-NEXT:    [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP40]], float undef
+; CHECK-NEXT:    [[TMP43:%.*]] = fcmp fast ogt float [[TMP42]], undef
+; CHECK-NEXT:    [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP42]], float undef
+; CHECK-NEXT:    [[TMP45:%.*]] = fcmp fast ogt float [[TMP44]], undef
+; CHECK-NEXT:    [[TMP46:%.*]] = select i1 [[TMP45]], float [[TMP44]], float undef
+; CHECK-NEXT:    [[TMP47:%.*]] = fcmp fast ogt float [[TMP46]], undef
+; CHECK-NEXT:    [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP46]], float undef
+; CHECK-NEXT:    [[TMP49:%.*]] = fcmp fast ogt float [[TMP48]], undef
+; CHECK-NEXT:    [[TMP50:%.*]] = select i1 [[TMP49]], float [[TMP48]], float undef
+; CHECK-NEXT:    [[TMP51:%.*]] = fcmp fast ogt float [[TMP50]], undef
+; CHECK-NEXT:    [[TMP52:%.*]] = select i1 [[TMP51]], float [[TMP50]], float undef
+; CHECK-NEXT:    [[TMP53:%.*]] = fcmp fast ogt float [[TMP52]], undef
+; CHECK-NEXT:    [[TMP54:%.*]] = select i1 [[TMP53]], float [[TMP52]], float undef
+; CHECK-NEXT:    [[TMP55:%.*]] = fcmp fast ogt float [[TMP54]], undef
+; CHECK-NEXT:    [[TMP56:%.*]] = select i1 [[TMP55]], float [[TMP54]], float undef
+; CHECK-NEXT:    [[TMP57:%.*]] = fcmp fast ogt float [[TMP56]], undef
+; CHECK-NEXT:    [[TMP58:%.*]] = select i1 [[TMP57]], float [[TMP56]], float undef
+; CHECK-NEXT:    [[TMP59:%.*]] = fcmp fast ogt float [[TMP58]], undef
+; CHECK-NEXT:    [[TMP60:%.*]] = select i1 [[TMP59]], float [[TMP58]], float undef
+; CHECK-NEXT:    [[TMP61:%.*]] = fcmp fast ogt float [[TMP60]], undef
+; CHECK-NEXT:    [[TMP62:%.*]] = select i1 [[TMP61]], float [[TMP60]], float undef
+; CHECK-NEXT:    [[TMP63:%.*]] = fcmp fast ogt float [[TMP62]], undef
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP2]], <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <32 x float> [[TMP2]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x float> [[TMP2]], <32 x float> [[RDX_SHUF]]
@@ -359,8 +570,9 @@ define float @maxf32(float) {
  ; CHECK-NEXT:    [[RDX_SHUF10:%.*]] = shufflevector <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[RDX_MINMAX_CMP11:%.*]] = fcmp fast ogt <32 x float> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]]
  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float> [[RDX_SHUF10]]
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <32 x float> [[RDX_MINMAX_SELECT12]], i32 0
-; CHECK-NEXT:    ret float [[TMP3]]
+; CHECK-NEXT:    [[TMP64:%.*]] = extractelement <32 x float> [[RDX_MINMAX_SELECT12]], i32 0
+; CHECK-NEXT:    [[TMP65:%.*]] = select i1 [[TMP63]], float [[TMP62]], float undef
+; CHECK-NEXT:    ret float [[TMP64]]
  ;
    %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
    %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
@@ -466,24 +678,34 @@ define i32 @maxi8_mutiple_uses(i32) {
  ; SSE-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
  ; SSE-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
  ; SSE-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
-; SSE-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
+; SSE-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
+; SSE-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
+; SSE-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
+; SSE-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef
+; SSE-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
+; SSE-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef
+; SSE-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
+; SSE-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef
+; SSE-NEXT:    [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
+; SSE-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
  ; SSE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; SSE-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]]
  ; SSE-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
  ; SSE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  ; SSE-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
  ; SSE-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]]
-; SSE-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
-; SSE-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]]
-; SSE-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP7]]
-; SSE-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]]
-; SSE-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 [[TMP5]]
-; SSE-NEXT:    [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
-; SSE-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP12]]
-; SSE-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA]], i32 [[TMP12]]
-; SSE-NEXT:    [[TMP15:%.*]] = select i1 [[TMP4]], i32 3, i32 4
-; SSE-NEXT:    store i32 [[TMP15]], i32* @var, align 8
-; SSE-NEXT:    ret i32 [[TMP14]]
+; SSE-NEXT:    [[TMP17:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
+; SSE-NEXT:    [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], [[TMP15]]
+; SSE-NEXT:    [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP17]], i32 [[TMP15]]
+; SSE-NEXT:    [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], [[TMP5]]
+; SSE-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP20]], i32 [[TMP19]], i32 [[TMP5]]
+; SSE-NEXT:    [[TMP21:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
+; SSE-NEXT:    [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+; SSE-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP22]]
+; SSE-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[OP_EXTRA]], i32 [[TMP22]]
+; SSE-NEXT:    [[TMP25:%.*]] = select i1 [[TMP4]], i32 3, i32 4
+; SSE-NEXT:    store i32 [[TMP25]], i32* @var, align 8
+; SSE-NEXT:    ret i32 [[TMP24]]
  ;
  ; AVX-LABEL: @maxi8_mutiple_uses(
  ; AVX-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
@@ -491,24 +713,34 @@ define i32 @maxi8_mutiple_uses(i32) {
  ; AVX-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
  ; AVX-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
  ; AVX-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
-; AVX-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
+; AVX-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
+; AVX-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
+; AVX-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
+; AVX-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef
+; AVX-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
+; AVX-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef
+; AVX-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
+; AVX-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef
+; AVX-NEXT:    [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
+; AVX-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
  ; AVX-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; AVX-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]]
  ; AVX-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
  ; AVX-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  ; AVX-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
  ; AVX-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]]
-; AVX-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
-; AVX-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]]
-; AVX-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP7]]
-; AVX-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]]
-; AVX-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 [[TMP5]]
-; AVX-NEXT:    [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
-; AVX-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP12]]
-; AVX-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA]], i32 [[TMP12]]
-; AVX-NEXT:    [[TMP15:%.*]] = select i1 [[TMP4]], i32 3, i32 4
-; AVX-NEXT:    store i32 [[TMP15]], i32* @var, align 8
-; AVX-NEXT:    ret i32 [[TMP14]]
+; AVX-NEXT:    [[TMP17:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
+; AVX-NEXT:    [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], [[TMP15]]
+; AVX-NEXT:    [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP17]], i32 [[TMP15]]
+; AVX-NEXT:    [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], [[TMP5]]
+; AVX-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP20]], i32 [[TMP19]], i32 [[TMP5]]
+; AVX-NEXT:    [[TMP21:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
+; AVX-NEXT:    [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+; AVX-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP22]]
+; AVX-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[OP_EXTRA]], i32 [[TMP22]]
+; AVX-NEXT:    [[TMP25:%.*]] = select i1 [[TMP4]], i32 3, i32 4
+; AVX-NEXT:    store i32 [[TMP25]], i32* @var, align 8
+; AVX-NEXT:    ret i32 [[TMP24]]
  ;
  ; AVX2-LABEL: @maxi8_mutiple_uses(
  ; AVX2-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
@@ -516,24 +748,34 @@ define i32 @maxi8_mutiple_uses(i32) {
  ; AVX2-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
  ; AVX2-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
  ; AVX2-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
-; AVX2-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
+; AVX2-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
+; AVX2-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
+; AVX2-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
+; AVX2-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef
+; AVX2-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
+; AVX2-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef
+; AVX2-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
+; AVX2-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef
+; AVX2-NEXT:    [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
+; AVX2-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
  ; AVX2-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; AVX2-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]]
  ; AVX2-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
  ; AVX2-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  ; AVX2-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
  ; AVX2-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]]
-; AVX2-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
-; AVX2-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]]
-; AVX2-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP7]]
-; AVX2-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]]
-; AVX2-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 [[TMP5]]
-; AVX2-NEXT:    [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
-; AVX2-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP12]]
-; AVX2-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA]], i32 [[TMP12]]
-; AVX2-NEXT:    [[TMP15:%.*]] = select i1 [[TMP4]], i32 3, i32 4
-; AVX2-NEXT:    store i32 [[TMP15]], i32* @var, align 8
-; AVX2-NEXT:    ret i32 [[TMP14]]
+; AVX2-NEXT:    [[TMP17:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
+; AVX2-NEXT:    [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], [[TMP15]]
+; AVX2-NEXT:    [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP17]], i32 [[TMP15]]
+; AVX2-NEXT:    [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], [[TMP5]]
+; AVX2-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP20]], i32 [[TMP19]], i32 [[TMP5]]
+; AVX2-NEXT:    [[TMP21:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
+; AVX2-NEXT:    [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+; AVX2-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP22]]
+; AVX2-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[OP_EXTRA]], i32 [[TMP22]]
+; AVX2-NEXT:    [[TMP25:%.*]] = select i1 [[TMP4]], i32 3, i32 4
+; AVX2-NEXT:    store i32 [[TMP25]], i32* @var, align 8
+; AVX2-NEXT:    ret i32 [[TMP24]]
  ;
  ; SKX-LABEL: @maxi8_mutiple_uses(
  ; SKX-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([32 x i32]* @arr to <2 x i32>*), align 16
@@ -555,16 +797,26 @@ define i32 @maxi8_mutiple_uses(i32) {
  ; SKX-NEXT:    [[TMP12:%.*]] = icmp sgt <2 x i32> [[TMP9]], [[TMP11]]
  ; SKX-NEXT:    [[TMP13:%.*]] = select <2 x i1> [[TMP12]], <2 x i32> [[TMP9]], <2 x i32> [[TMP11]]
  ; SKX-NEXT:    [[TMP14:%.*]] = extractelement <2 x i32> [[TMP13]], i32 1
-; SKX-NEXT:    [[TMP15:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0
-; SKX-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP15]], [[TMP14]]
-; SKX-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP16]], i32 [[TMP15]], i32 [[TMP14]]
-; SKX-NEXT:    [[TMP17:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
-; SKX-NEXT:    [[TMP18:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP17]]
-; SKX-NEXT:    [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[OP_EXTRA]], i32 [[TMP17]]
-; SKX-NEXT:    [[TMP20:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1
-; SKX-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], i32 3, i32 4
-; SKX-NEXT:    store i32 [[TMP21]], i32* @var, align 8
-; SKX-NEXT:    ret i32 [[TMP19]]
+; SKX-NEXT:    [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef
+; SKX-NEXT:    [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef
+; SKX-NEXT:    [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef
+; SKX-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 undef
+; SKX-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef
+; SKX-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 undef
+; SKX-NEXT:    [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef
+; SKX-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef
+; SKX-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP6]]
+; SKX-NEXT:    [[TMP24:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0
+; SKX-NEXT:    [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP14]]
+; SKX-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP14]]
+; SKX-NEXT:    [[TMP26:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 [[TMP6]]
+; SKX-NEXT:    [[TMP27:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+; SKX-NEXT:    [[TMP28:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP27]]
+; SKX-NEXT:    [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[OP_EXTRA]], i32 [[TMP27]]
+; SKX-NEXT:    [[TMP30:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1
+; SKX-NEXT:    [[TMP31:%.*]] = select i1 [[TMP30]], i32 3, i32 4
+; SKX-NEXT:    store i32 [[TMP31]], i32* @var, align 8
+; SKX-NEXT:    ret i32 [[TMP29]]
  ;
    %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
    %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
@@ -602,21 +854,33 @@ define i32 @maxi8_wrong_parent(i32) {
  ; SSE:       pp:
  ; SSE-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
  ; SSE-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
-; SSE-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
-; SSE-NEXT:    [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
+; SSE-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
+; SSE-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
+; SSE-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef
+; SSE-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
+; SSE-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef
+; SSE-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
+; SSE-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef
+; SSE-NEXT:    [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
+; SSE-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
+; SSE-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
+; SSE-NEXT:    [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+; SSE-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
  ; SSE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; SSE-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]]
  ; SSE-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
  ; SSE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  ; SSE-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
  ; SSE-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]]
-; SSE-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
-; SSE-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
-; SSE-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]]
-; SSE-NEXT:    [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
-; SSE-NEXT:    [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]]
-; SSE-NEXT:    [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]]
-; SSE-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 [[TMP5]]
+; SSE-NEXT:    [[TMP20:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
+; SSE-NEXT:    [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], [[TMP15]]
+; SSE-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 [[TMP15]]
+; SSE-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP18]]
+; SSE-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 [[TMP18]]
+; SSE-NEXT:    [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP5]]
+; SSE-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP5]]
+; SSE-NEXT:    [[TMP26:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
  ; SSE-NEXT:    ret i32 [[OP_EXTRA]]
  ;
  ; AVX-LABEL: @maxi8_wrong_parent(
@@ -627,21 +891,33 @@ define i32 @maxi8_wrong_parent(i32) {
  ; AVX:       pp:
  ; AVX-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
  ; AVX-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
-; AVX-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
-; AVX-NEXT:    [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+; AVX-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
+; AVX-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
+; AVX-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
+; AVX-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef
+; AVX-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
+; AVX-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef
+; AVX-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
+; AVX-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef
+; AVX-NEXT:    [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
+; AVX-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
+; AVX-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
+; AVX-NEXT:    [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+; AVX-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
  ; AVX-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; AVX-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]]
  ; AVX-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
  ; AVX-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  ; AVX-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
  ; AVX-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]]
-; AVX-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
-; AVX-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
-; AVX-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]]
-; AVX-NEXT:    [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
-; AVX-NEXT:    [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]]
-; AVX-NEXT:    [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]]
-; AVX-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 [[TMP5]]
+; AVX-NEXT:    [[TMP20:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
+; AVX-NEXT:    [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], [[TMP15]]
+; AVX-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 [[TMP15]]
+; AVX-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP18]]
+; AVX-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 [[TMP18]]
+; AVX-NEXT:    [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP5]]
+; AVX-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP5]]
+; AVX-NEXT:    [[TMP26:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
  ; AVX-NEXT:    ret i32 [[OP_EXTRA]]
  ;
  ; AVX2-LABEL: @maxi8_wrong_parent(
@@ -652,21 +928,33 @@ define i32 @maxi8_wrong_parent(i32) {
  ; AVX2:       pp:
  ; AVX2-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
  ; AVX2-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
-; AVX2-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
-; AVX2-NEXT:    [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+; AVX2-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
+; AVX2-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
+; AVX2-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
+; AVX2-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef
+; AVX2-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
+; AVX2-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef
+; AVX2-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
+; AVX2-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef
+; AVX2-NEXT:    [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
+; AVX2-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
+; AVX2-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
+; AVX2-NEXT:    [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+; AVX2-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
  ; AVX2-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; AVX2-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]]
  ; AVX2-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
  ; AVX2-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  ; AVX2-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
  ; AVX2-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]]
-; AVX2-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
-; AVX2-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
-; AVX2-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]]
-; AVX2-NEXT:    [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
-; AVX2-NEXT:    [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]]
-; AVX2-NEXT:    [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]]
-; AVX2-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 [[TMP5]]
+; AVX2-NEXT:    [[TMP20:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
+; AVX2-NEXT:    [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], [[TMP15]]
+; AVX2-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 [[TMP15]]
+; AVX2-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP18]]
+; AVX2-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 [[TMP18]]
+; AVX2-NEXT:    [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP5]]
+; AVX2-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP5]]
+; AVX2-NEXT:    [[TMP26:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
  ; AVX2-NEXT:    ret i32 [[OP_EXTRA]]
  ;
  ; SKX-LABEL: @maxi8_wrong_parent(
@@ -697,9 +985,21 @@ define i32 @maxi8_wrong_parent(i32) {
  ; SKX-NEXT:    [[TMP18:%.*]] = insertelement <2 x i32> [[TMP17]], i32 [[TMP4]], i32 1
  ; SKX-NEXT:    [[TMP19:%.*]] = select <2 x i1> [[TMP14]], <2 x i32> [[TMP16]], <2 x i32> [[TMP18]]
  ; SKX-NEXT:    [[TMP20:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1
-; SKX-NEXT:    [[TMP21:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0
-; SKX-NEXT:    [[TMP22:%.*]] = icmp sgt i32 [[TMP21]], [[TMP20]]
-; SKX-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP22]], i32 [[TMP21]], i32 [[TMP20]]
+; SKX-NEXT:    [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef
+; SKX-NEXT:    [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef
+; SKX-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef
+; SKX-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef
+; SKX-NEXT:    [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef
+; SKX-NEXT:    [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef
+; SKX-NEXT:    [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef
+; SKX-NEXT:    [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef
+; SKX-NEXT:    [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], [[TMP7]]
+; SKX-NEXT:    [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 [[TMP7]]
+; SKX-NEXT:    [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], [[TMP8]]
+; SKX-NEXT:    [[TMP32:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0
+; SKX-NEXT:    [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], [[TMP20]]
+; SKX-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32 [[TMP20]]
+; SKX-NEXT:    [[TMP34:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 [[TMP8]]
  ; SKX-NEXT:    ret i32 [[OP_EXTRA]]
  ;
    %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
diff --git a/test/Transforms/SLPVectorizer/X86/horizontal.ll b/test/Transforms/SLPVectorizer/X86/horizontal.ll

index ed0e5784e30260b3a934c5b8818acf5fbcdef0b3..311d8a476c71db861710b5dbc497a16ce3b05965 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/horizontal.ll
+++ b/test/Transforms/SLPVectorizer/X86/horizontal.ll
@@ -37,11 +37,14 @@ define i32 @add_red(float* %A, i32 %n) {
  ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>*
  ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
  ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], <float 7.000000e+00, float 7.000000e+00, float 7.000000e+00, float 7.000000e+00>
+; CHECK-NEXT:    [[ADD6:%.*]] = fadd fast float undef, undef
+; CHECK-NEXT:    [[ADD11:%.*]] = fadd fast float [[ADD6]], undef
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
  ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
+; CHECK-NEXT:    [[ADD16:%.*]] = fadd fast float [[ADD11]], undef
  ; CHECK-NEXT:    [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]]
  ; CHECK-NEXT:    [[INC]] = add nsw i64 [[I_033]], 1
  ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]]
@@ -74,11 +77,14 @@ define i32 @add_red(float* %A, i32 %n) {
  ; STORE-NEXT:    [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>*
  ; STORE-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
  ; STORE-NEXT:    [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], <float 7.000000e+00, float 7.000000e+00, float 7.000000e+00, float 7.000000e+00>
+; STORE-NEXT:    [[ADD6:%.*]] = fadd fast float undef, undef
+; STORE-NEXT:    [[ADD11:%.*]] = fadd fast float [[ADD6]], undef
  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; STORE-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]]
  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  ; STORE-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
  ; STORE-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
+; STORE-NEXT:    [[ADD16:%.*]] = fadd fast float [[ADD11]], undef
  ; STORE-NEXT:    [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]]
  ; STORE-NEXT:    [[INC]] = add nsw i64 [[I_033]], 1
  ; STORE-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]]
@@ -172,11 +178,14 @@ define i32 @mul_red(float* noalias %A, float* noalias %B, i32 %n) {
  ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>*
  ; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
  ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[ADD8:%.*]] = fadd fast float undef, undef
+; CHECK-NEXT:    [[ADD14:%.*]] = fadd fast float [[ADD8]], undef
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
  ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
+; CHECK-NEXT:    [[ADD20:%.*]] = fadd fast float [[ADD14]], undef
  ; CHECK-NEXT:    [[MUL21]] = fmul float [[SUM_039]], [[TMP6]]
  ; CHECK-NEXT:    [[INC]] = add nsw i64 [[I_040]], 1
  ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
@@ -214,11 +223,14 @@ define i32 @mul_red(float* noalias %A, float* noalias %B, i32 %n) {
  ; STORE-NEXT:    [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>*
  ; STORE-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
  ; STORE-NEXT:    [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]]
+; STORE-NEXT:    [[ADD8:%.*]] = fadd fast float undef, undef
+; STORE-NEXT:    [[ADD14:%.*]] = fadd fast float [[ADD8]], undef
  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; STORE-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]], [[RDX_SHUF]]
  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  ; STORE-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
  ; STORE-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
+; STORE-NEXT:    [[ADD20:%.*]] = fadd fast float [[ADD14]], undef
  ; STORE-NEXT:    [[MUL21]] = fmul float [[SUM_039]], [[TMP6]]
  ; STORE-NEXT:    [[INC]] = add nsw i64 [[I_040]], 1
  ; STORE-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
@@ -338,6 +350,13 @@ define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) {
  ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x float>*
  ; CHECK-NEXT:    [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4
  ; CHECK-NEXT:    [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]]
+; CHECK-NEXT:    [[ADD8:%.*]] = fadd fast float undef, undef
+; CHECK-NEXT:    [[ADD14:%.*]] = fadd fast float [[ADD8]], undef
+; CHECK-NEXT:    [[ADD20:%.*]] = fadd fast float [[ADD14]], undef
+; CHECK-NEXT:    [[ADD26:%.*]] = fadd fast float [[ADD20]], undef
+; CHECK-NEXT:    [[ADD32:%.*]] = fadd fast float [[ADD26]], undef
+; CHECK-NEXT:    [[ADD38:%.*]] = fadd fast float [[ADD32]], undef
+; CHECK-NEXT:    [[ADD44:%.*]] = fadd fast float [[ADD38]], undef
  ; CHECK-NEXT:    [[ADD47:%.*]] = add nsw i64 [[MUL]], 8
  ; CHECK-NEXT:    [[ARRAYIDX48:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD47]]
  ; CHECK-NEXT:    [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4
@@ -350,6 +369,7 @@ define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) {
  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
  ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
  ; CHECK-NEXT:    [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]]
+; CHECK-NEXT:    [[ADD50:%.*]] = fadd fast float [[ADD44]], [[MUL49]]
  ; CHECK-NEXT:    [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]]
  ; CHECK-NEXT:    [[INC]] = add nsw i64 [[I_083]], 1
  ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]]
@@ -401,6 +421,13 @@ define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) {
  ; STORE-NEXT:    [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x float>*
  ; STORE-NEXT:    [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4
  ; STORE-NEXT:    [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]]
+; STORE-NEXT:    [[ADD8:%.*]] = fadd fast float undef, undef
+; STORE-NEXT:    [[ADD14:%.*]] = fadd fast float [[ADD8]], undef
+; STORE-NEXT:    [[ADD20:%.*]] = fadd fast float [[ADD14]], undef
+; STORE-NEXT:    [[ADD26:%.*]] = fadd fast float [[ADD20]], undef
+; STORE-NEXT:    [[ADD32:%.*]] = fadd fast float [[ADD26]], undef
+; STORE-NEXT:    [[ADD38:%.*]] = fadd fast float [[ADD32]], undef
+; STORE-NEXT:    [[ADD44:%.*]] = fadd fast float [[ADD38]], undef
  ; STORE-NEXT:    [[ADD47:%.*]] = add nsw i64 [[MUL]], 8
  ; STORE-NEXT:    [[ARRAYIDX48:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD47]]
  ; STORE-NEXT:    [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4
@@ -413,6 +440,7 @@ define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) {
  ; STORE-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
  ; STORE-NEXT:    [[TMP8:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
  ; STORE-NEXT:    [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]]
+; STORE-NEXT:    [[ADD50:%.*]] = fadd fast float [[ADD44]], [[MUL49]]
  ; STORE-NEXT:    [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]]
  ; STORE-NEXT:    [[INC]] = add nsw i64 [[I_083]], 1
  ; STORE-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]]
@@ -548,12 +576,16 @@ define i32 @chain_red(float* noalias %A, float* noalias %B, i32 %n) {
  ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>*
  ; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
  ; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[SUM_042]], undef
+; CHECK-NEXT:    [[ADD9:%.*]] = fadd fast float [[ADD]], undef
+; CHECK-NEXT:    [[ADD15:%.*]] = fadd fast float [[ADD9]], undef
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
  ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
  ; CHECK-NEXT:    [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]]
+; CHECK-NEXT:    [[ADD21:%.*]] = fadd fast float [[ADD15]], undef
  ; CHECK-NEXT:    [[INC]] = add nsw i64 [[I_043]], 1
  ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
  ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
@@ -590,12 +622,16 @@ define i32 @chain_red(float* noalias %A, float* noalias %B, i32 %n) {
  ; STORE-NEXT:    [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>*
  ; STORE-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
  ; STORE-NEXT:    [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]]
+; STORE-NEXT:    [[ADD:%.*]] = fadd fast float [[SUM_042]], undef
+; STORE-NEXT:    [[ADD9:%.*]] = fadd fast float [[ADD]], undef
+; STORE-NEXT:    [[ADD15:%.*]] = fadd fast float [[ADD9]], undef
  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; STORE-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]], [[RDX_SHUF]]
  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  ; STORE-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
  ; STORE-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
  ; STORE-NEXT:    [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]]
+; STORE-NEXT:    [[ADD21:%.*]] = fadd fast float [[ADD15]], undef
  ; STORE-NEXT:    [[INC]] = add nsw i64 [[I_043]], 1
  ; STORE-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
  ; STORE-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
@@ -1051,11 +1087,14 @@ define i32 @store_red(float* noalias %A, float* noalias %B, float* noalias %C, i
  ; STORE-NEXT:    [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>*
  ; STORE-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
  ; STORE-NEXT:    [[TMP5:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP4]]
+; STORE-NEXT:    [[ADD8:%.*]] = fadd fast float undef, undef
+; STORE-NEXT:    [[ADD14:%.*]] = fadd fast float [[ADD8]], undef
  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; STORE-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]], [[RDX_SHUF]]
  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  ; STORE-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
  ; STORE-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
+; STORE-NEXT:    [[ADD20:%.*]] = fadd fast float [[ADD14]], undef
  ; STORE-NEXT:    store float [[TMP6]], float* [[C_ADDR_038]], align 4
  ; STORE-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds float, float* [[C_ADDR_038]], i64 1
  ; STORE-NEXT:    [[INC]] = add nsw i64 [[I_039]], 1
@@ -1130,11 +1169,14 @@ define void @float_red_example4(float* %res) {
  ; STORE-LABEL: @float_red_example4(
  ; STORE-NEXT:  entry:
  ; STORE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([32 x float]* @arr_float to <4 x float>*), align 16
+; STORE-NEXT:    [[ADD:%.*]] = fadd fast float undef, undef
+; STORE-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; STORE-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP0]], [[RDX_SHUF]]
  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  ; STORE-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
+; STORE-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
  ; STORE-NEXT:    store float [[TMP1]], float* [[RES:%.*]], align 16
  ; STORE-NEXT:    ret void
  ;
@@ -1174,6 +1216,12 @@ define void @float_red_example8(float* %res) {
  ; STORE-LABEL: @float_red_example8(
  ; STORE-NEXT:  entry:
  ; STORE-NEXT:    [[TMP0:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr_float to <8 x float>*), align 16
+; STORE-NEXT:    [[ADD:%.*]] = fadd fast float undef, undef
+; STORE-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
+; STORE-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
+; STORE-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
+; STORE-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
+; STORE-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; STORE-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP0]], [[RDX_SHUF]]
  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -1181,6 +1229,7 @@ define void @float_red_example8(float* %res) {
  ; STORE-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; STORE-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
+; STORE-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
  ; STORE-NEXT:    store float [[TMP1]], float* [[RES:%.*]], align 16
  ; STORE-NEXT:    ret void
  ;
@@ -1244,6 +1293,20 @@ define void @float_red_example16(float* %res) {
  ; STORE-LABEL: @float_red_example16(
  ; STORE-NEXT:  entry:
  ; STORE-NEXT:    [[TMP0:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr_float to <16 x float>*), align 16
+; STORE-NEXT:    [[ADD:%.*]] = fadd fast float undef, undef
+; STORE-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
+; STORE-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
+; STORE-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
+; STORE-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
+; STORE-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
+; STORE-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
+; STORE-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
+; STORE-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
+; STORE-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
+; STORE-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
+; STORE-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
+; STORE-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
+; STORE-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; STORE-NEXT:    [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP0]], [[RDX_SHUF]]
  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[BIN_RDX]], <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -1253,6 +1316,7 @@ define void @float_red_example16(float* %res) {
  ; STORE-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <16 x float> [[BIN_RDX4]], <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; STORE-NEXT:    [[BIN_RDX6:%.*]] = fadd fast <16 x float> [[BIN_RDX4]], [[RDX_SHUF5]]
  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <16 x float> [[BIN_RDX6]], i32 0
+; STORE-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
  ; STORE-NEXT:    store float [[TMP1]], float* [[RES:%.*]], align 16
  ; STORE-NEXT:    ret void
  ;
@@ -1308,11 +1372,14 @@ define void @i32_red_example4(i32* %res) {
  ; STORE-LABEL: @i32_red_example4(
  ; STORE-NEXT:  entry:
  ; STORE-NEXT:    [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr_i32 to <4 x i32>*), align 16
+; STORE-NEXT:    [[ADD:%.*]] = add nsw i32 undef, undef
+; STORE-NEXT:    [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; STORE-NEXT:    [[BIN_RDX:%.*]] = add nsw <4 x i32> [[TMP0]], [[RDX_SHUF]]
  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  ; STORE-NEXT:    [[BIN_RDX2:%.*]] = add nsw <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
+; STORE-NEXT:    [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
  ; STORE-NEXT:    store i32 [[TMP1]], i32* [[RES:%.*]], align 16
  ; STORE-NEXT:    ret void
  ;
@@ -1352,6 +1419,12 @@ define void @i32_red_example8(i32* %res) {
  ; STORE-LABEL: @i32_red_example8(
  ; STORE-NEXT:  entry:
  ; STORE-NEXT:    [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16
+; STORE-NEXT:    [[ADD:%.*]] = add nsw i32 undef, undef
+; STORE-NEXT:    [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
+; STORE-NEXT:    [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
+; STORE-NEXT:    [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
+; STORE-NEXT:    [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
+; STORE-NEXT:    [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; STORE-NEXT:    [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]]
  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -1359,6 +1432,7 @@ define void @i32_red_example8(i32* %res) {
  ; STORE-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; STORE-NEXT:    [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
+; STORE-NEXT:    [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
  ; STORE-NEXT:    store i32 [[TMP1]], i32* [[RES:%.*]], align 16
  ; STORE-NEXT:    ret void
  ;
@@ -1422,6 +1496,20 @@ define void @i32_red_example16(i32* %res) {
  ; STORE-LABEL: @i32_red_example16(
  ; STORE-NEXT:  entry:
  ; STORE-NEXT:    [[TMP0:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr_i32 to <16 x i32>*), align 16
+; STORE-NEXT:    [[ADD:%.*]] = add nsw i32 undef, undef
+; STORE-NEXT:    [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
+; STORE-NEXT:    [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
+; STORE-NEXT:    [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
+; STORE-NEXT:    [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
+; STORE-NEXT:    [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
+; STORE-NEXT:    [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
+; STORE-NEXT:    [[ADD_7:%.*]] = add nsw i32 undef, [[ADD_6]]
+; STORE-NEXT:    [[ADD_8:%.*]] = add nsw i32 undef, [[ADD_7]]
+; STORE-NEXT:    [[ADD_9:%.*]] = add nsw i32 undef, [[ADD_8]]
+; STORE-NEXT:    [[ADD_10:%.*]] = add nsw i32 undef, [[ADD_9]]
+; STORE-NEXT:    [[ADD_11:%.*]] = add nsw i32 undef, [[ADD_10]]
+; STORE-NEXT:    [[ADD_12:%.*]] = add nsw i32 undef, [[ADD_11]]
+; STORE-NEXT:    [[ADD_13:%.*]] = add nsw i32 undef, [[ADD_12]]
  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; STORE-NEXT:    [[BIN_RDX:%.*]] = add nsw <16 x i32> [[TMP0]], [[RDX_SHUF]]
  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[BIN_RDX]], <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -1431,6 +1519,7 @@ define void @i32_red_example16(i32* %res) {
  ; STORE-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <16 x i32> [[BIN_RDX4]], <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; STORE-NEXT:    [[BIN_RDX6:%.*]] = add nsw <16 x i32> [[BIN_RDX4]], [[RDX_SHUF5]]
  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <16 x i32> [[BIN_RDX6]], i32 0
+; STORE-NEXT:    [[ADD_14:%.*]] = add nsw i32 undef, [[ADD_13]]
  ; STORE-NEXT:    store i32 [[TMP1]], i32* [[RES:%.*]], align 16
  ; STORE-NEXT:    ret void
  ;
@@ -1542,6 +1631,36 @@ define void @i32_red_example32(i32* %res) {
  ; STORE-LABEL: @i32_red_example32(
  ; STORE-NEXT:  entry:
  ; STORE-NEXT:    [[TMP0:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr_i32 to <32 x i32>*), align 16
+; STORE-NEXT:    [[ADD:%.*]] = add nsw i32 undef, undef
+; STORE-NEXT:    [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
+; STORE-NEXT:    [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
+; STORE-NEXT:    [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
+; STORE-NEXT:    [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
+; STORE-NEXT:    [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
+; STORE-NEXT:    [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
+; STORE-NEXT:    [[ADD_7:%.*]] = add nsw i32 undef, [[ADD_6]]
+; STORE-NEXT:    [[ADD_8:%.*]] = add nsw i32 undef, [[ADD_7]]
+; STORE-NEXT:    [[ADD_9:%.*]] = add nsw i32 undef, [[ADD_8]]
+; STORE-NEXT:    [[ADD_10:%.*]] = add nsw i32 undef, [[ADD_9]]
+; STORE-NEXT:    [[ADD_11:%.*]] = add nsw i32 undef, [[ADD_10]]
+; STORE-NEXT:    [[ADD_12:%.*]] = add nsw i32 undef, [[ADD_11]]
+; STORE-NEXT:    [[ADD_13:%.*]] = add nsw i32 undef, [[ADD_12]]
+; STORE-NEXT:    [[ADD_14:%.*]] = add nsw i32 undef, [[ADD_13]]
+; STORE-NEXT:    [[ADD_15:%.*]] = add nsw i32 undef, [[ADD_14]]
+; STORE-NEXT:    [[ADD_16:%.*]] = add nsw i32 undef, [[ADD_15]]
+; STORE-NEXT:    [[ADD_17:%.*]] = add nsw i32 undef, [[ADD_16]]
+; STORE-NEXT:    [[ADD_18:%.*]] = add nsw i32 undef, [[ADD_17]]
+; STORE-NEXT:    [[ADD_19:%.*]] = add nsw i32 undef, [[ADD_18]]
+; STORE-NEXT:    [[ADD_20:%.*]] = add nsw i32 undef, [[ADD_19]]
+; STORE-NEXT:    [[ADD_21:%.*]] = add nsw i32 undef, [[ADD_20]]
+; STORE-NEXT:    [[ADD_22:%.*]] = add nsw i32 undef, [[ADD_21]]
+; STORE-NEXT:    [[ADD_23:%.*]] = add nsw i32 undef, [[ADD_22]]
+; STORE-NEXT:    [[ADD_24:%.*]] = add nsw i32 undef, [[ADD_23]]
+; STORE-NEXT:    [[ADD_25:%.*]] = add nsw i32 undef, [[ADD_24]]
+; STORE-NEXT:    [[ADD_26:%.*]] = add nsw i32 undef, [[ADD_25]]
+; STORE-NEXT:    [[ADD_27:%.*]] = add nsw i32 undef, [[ADD_26]]
+; STORE-NEXT:    [[ADD_28:%.*]] = add nsw i32 undef, [[ADD_27]]
+; STORE-NEXT:    [[ADD_29:%.*]] = add nsw i32 undef, [[ADD_28]]
  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; STORE-NEXT:    [[BIN_RDX:%.*]] = add nsw <32 x i32> [[TMP0]], [[RDX_SHUF]]
  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <32 x i32> [[BIN_RDX]], <32 x i32> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -1553,6 +1672,7 @@ define void @i32_red_example32(i32* %res) {
  ; STORE-NEXT:    [[RDX_SHUF7:%.*]] = shufflevector <32 x i32> [[BIN_RDX6]], <32 x i32> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; STORE-NEXT:    [[BIN_RDX8:%.*]] = add nsw <32 x i32> [[BIN_RDX6]], [[RDX_SHUF7]]
  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <32 x i32> [[BIN_RDX8]], i32 0
+; STORE-NEXT:    [[ADD_30:%.*]] = add nsw i32 undef, [[ADD_29]]
  ; STORE-NEXT:    store i32 [[TMP1]], i32* [[RES:%.*]], align 16
  ; STORE-NEXT:    ret void
  ;
@@ -1630,6 +1750,12 @@ define void @i32_red_call(i32 %val) {
  ; CHECK-LABEL: @i32_red_call(
  ; CHECK-NEXT:  entry:
  ; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 undef, undef
+; CHECK-NEXT:    [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
+; CHECK-NEXT:    [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
+; CHECK-NEXT:    [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
+; CHECK-NEXT:    [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
+; CHECK-NEXT:    [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -1637,12 +1763,19 @@ define void @i32_red_call(i32 %val) {
  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
  ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
+; CHECK-NEXT:    [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
  ; CHECK-NEXT:    [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]])
  ; CHECK-NEXT:    ret void
  ;
  ; STORE-LABEL: @i32_red_call(
  ; STORE-NEXT:  entry:
  ; STORE-NEXT:    [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16
+; STORE-NEXT:    [[ADD:%.*]] = add nsw i32 undef, undef
+; STORE-NEXT:    [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
+; STORE-NEXT:    [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
+; STORE-NEXT:    [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
+; STORE-NEXT:    [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
+; STORE-NEXT:    [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; STORE-NEXT:    [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]]
  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -1650,6 +1783,7 @@ define void @i32_red_call(i32 %val) {
  ; STORE-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; STORE-NEXT:    [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
+; STORE-NEXT:    [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
  ; STORE-NEXT:    [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]])
  ; STORE-NEXT:    ret void
  ;
@@ -1677,6 +1811,12 @@ define void @i32_red_invoke(i32 %val) personality i32 (...)* @__gxx_personality_
  ; CHECK-LABEL: @i32_red_invoke(
  ; CHECK-NEXT:  entry:
  ; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 undef, undef
+; CHECK-NEXT:    [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
+; CHECK-NEXT:    [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
+; CHECK-NEXT:    [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
+; CHECK-NEXT:    [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
+; CHECK-NEXT:    [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -1684,6 +1824,7 @@ define void @i32_red_invoke(i32 %val) personality i32 (...)* @__gxx_personality_
  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
  ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
+; CHECK-NEXT:    [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
  ; CHECK-NEXT:    [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]])
  ; CHECK-NEXT:    to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]]
  ; CHECK:       exception:
@@ -1696,6 +1837,12 @@ define void @i32_red_invoke(i32 %val) personality i32 (...)* @__gxx_personality_
  ; STORE-LABEL: @i32_red_invoke(
  ; STORE-NEXT:  entry:
  ; STORE-NEXT:    [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16
+; STORE-NEXT:    [[ADD:%.*]] = add nsw i32 undef, undef
+; STORE-NEXT:    [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
+; STORE-NEXT:    [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
+; STORE-NEXT:    [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
+; STORE-NEXT:    [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
+; STORE-NEXT:    [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
  ; STORE-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; STORE-NEXT:    [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]]
  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -1703,6 +1850,7 @@ define void @i32_red_invoke(i32 %val) personality i32 (...)* @__gxx_personality_
  ; STORE-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; STORE-NEXT:    [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
+; STORE-NEXT:    [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
  ; STORE-NEXT:    [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]])
  ; STORE-NEXT:    to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]]
  ; STORE:       exception:
diff --git a/test/Transforms/SLPVectorizer/X86/long_chains.ll b/test/Transforms/SLPVectorizer/X86/long_chains.ll

index 52c1c6bee1216c14d42a463e3cfad73cb131ca8f..ffbdd9f1d148d45d940009ff366c42e97d76acb8 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/long_chains.ll
+++ b/test/Transforms/SLPVectorizer/X86/long_chains.ll
@@ -12,10 +12,10 @@ define i32 @test(double* nocapture %A, i8* nocapture %B) {
  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[B:%.*]] to <2 x i8>*
  ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* [[TMP0]], align 1
  ; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i8> [[TMP1]], <i8 3, i8 3>
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i32 1
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i8> [[TMP2]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i8> undef, i8 [[TMP4]], i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i8> [[TMP5]], i8 [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i8> undef, i8 [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i8> [[TMP4]], i8 [[TMP5]], i32 1
  ; CHECK-NEXT:    [[TMP7:%.*]] = sitofp <2 x i8> [[TMP6]] to <2 x double>
  ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP7]]
  ; CHECK-NEXT:    [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], <double 1.000000e+00, double 1.000000e+00>
diff --git a/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll b/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll

index 0b8c72b35a2a7eef0c1046877255aa92b39150f6..13bbe5e579812407a9adad4b1485b45e57272069 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll
+++ b/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll
@@ -5,6 +5,36 @@ define signext i8 @Foo(<32 x i8>* %__v) {
  ; CHECK-LABEL: @Foo(
  ; CHECK-NEXT:  entry:
  ; CHECK-NEXT:    [[TMP0:%.*]] = load <32 x i8>, <32 x i8>* [[__V:%.*]], align 32
+; CHECK-NEXT:    [[ADD_I_1_I:%.*]] = add i8 undef, undef
+; CHECK-NEXT:    [[ADD_I_2_I:%.*]] = add i8 [[ADD_I_1_I]], undef
+; CHECK-NEXT:    [[ADD_I_3_I:%.*]] = add i8 [[ADD_I_2_I]], undef
+; CHECK-NEXT:    [[ADD_I_4_I:%.*]] = add i8 [[ADD_I_3_I]], undef
+; CHECK-NEXT:    [[ADD_I_5_I:%.*]] = add i8 [[ADD_I_4_I]], undef
+; CHECK-NEXT:    [[ADD_I_6_I:%.*]] = add i8 [[ADD_I_5_I]], undef
+; CHECK-NEXT:    [[ADD_I_7_I:%.*]] = add i8 [[ADD_I_6_I]], undef
+; CHECK-NEXT:    [[ADD_I_8_I:%.*]] = add i8 [[ADD_I_7_I]], undef
+; CHECK-NEXT:    [[ADD_I_9_I:%.*]] = add i8 [[ADD_I_8_I]], undef
+; CHECK-NEXT:    [[ADD_I_10_I:%.*]] = add i8 [[ADD_I_9_I]], undef
+; CHECK-NEXT:    [[ADD_I_11_I:%.*]] = add i8 [[ADD_I_10_I]], undef
+; CHECK-NEXT:    [[ADD_I_12_I:%.*]] = add i8 [[ADD_I_11_I]], undef
+; CHECK-NEXT:    [[ADD_I_13_I:%.*]] = add i8 [[ADD_I_12_I]], undef
+; CHECK-NEXT:    [[ADD_I_14_I:%.*]] = add i8 [[ADD_I_13_I]], undef
+; CHECK-NEXT:    [[ADD_I_15_I:%.*]] = add i8 [[ADD_I_14_I]], undef
+; CHECK-NEXT:    [[ADD_I_16_I:%.*]] = add i8 [[ADD_I_15_I]], undef
+; CHECK-NEXT:    [[ADD_I_17_I:%.*]] = add i8 [[ADD_I_16_I]], undef
+; CHECK-NEXT:    [[ADD_I_18_I:%.*]] = add i8 [[ADD_I_17_I]], undef
+; CHECK-NEXT:    [[ADD_I_19_I:%.*]] = add i8 [[ADD_I_18_I]], undef
+; CHECK-NEXT:    [[ADD_I_20_I:%.*]] = add i8 [[ADD_I_19_I]], undef
+; CHECK-NEXT:    [[ADD_I_21_I:%.*]] = add i8 [[ADD_I_20_I]], undef
+; CHECK-NEXT:    [[ADD_I_22_I:%.*]] = add i8 [[ADD_I_21_I]], undef
+; CHECK-NEXT:    [[ADD_I_23_I:%.*]] = add i8 [[ADD_I_22_I]], undef
+; CHECK-NEXT:    [[ADD_I_24_I:%.*]] = add i8 [[ADD_I_23_I]], undef
+; CHECK-NEXT:    [[ADD_I_25_I:%.*]] = add i8 [[ADD_I_24_I]], undef
+; CHECK-NEXT:    [[ADD_I_26_I:%.*]] = add i8 [[ADD_I_25_I]], undef
+; CHECK-NEXT:    [[ADD_I_27_I:%.*]] = add i8 [[ADD_I_26_I]], undef
+; CHECK-NEXT:    [[ADD_I_28_I:%.*]] = add i8 [[ADD_I_27_I]], undef
+; CHECK-NEXT:    [[ADD_I_29_I:%.*]] = add i8 [[ADD_I_28_I]], undef
+; CHECK-NEXT:    [[ADD_I_30_I:%.*]] = add i8 [[ADD_I_29_I]], undef
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x i8> [[TMP0]], <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <32 x i8> [[TMP0]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <32 x i8> [[BIN_RDX]], <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -16,6 +46,7 @@ define signext i8 @Foo(<32 x i8>* %__v) {
  ; CHECK-NEXT:    [[RDX_SHUF7:%.*]] = shufflevector <32 x i8> [[BIN_RDX6]], <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX8:%.*]] = add <32 x i8> [[BIN_RDX6]], [[RDX_SHUF7]]
  ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <32 x i8> [[BIN_RDX8]], i32 0
+; CHECK-NEXT:    [[ADD_I_31_I:%.*]] = add i8 [[ADD_I_30_I]], undef
  ; CHECK-NEXT:    ret i8 [[TMP1]]
  ;
  entry:
diff --git a/test/Transforms/SLPVectorizer/X86/reduction_loads.ll b/test/Transforms/SLPVectorizer/X86/reduction_loads.ll

index 15ede8156fa5b5dd5972309118bdf617787610d5..56539ab928eab35207dfdffd8260a6b64e618334 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
+++ b/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
@@ -35,6 +35,13 @@ define i32 @test(i32* nocapture readonly %p) {
  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
  ; CHECK-NEXT:    [[TMP2:%.*]] = mul <8 x i32> [[TMP1]], <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 undef, [[SUM]]
+; CHECK-NEXT:    [[ADD_1:%.*]] = add i32 undef, [[ADD]]
+; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 undef, [[ADD_1]]
+; CHECK-NEXT:    [[ADD_3:%.*]] = add i32 undef, [[ADD_2]]
+; CHECK-NEXT:    [[ADD_4:%.*]] = add i32 undef, [[ADD_3]]
+; CHECK-NEXT:    [[ADD_5:%.*]] = add i32 undef, [[ADD_4]]
+; CHECK-NEXT:    [[ADD_6:%.*]] = add i32 undef, [[ADD_5]]
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <8 x i32> [[TMP2]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -43,6 +50,7 @@ define i32 @test(i32* nocapture readonly %p) {
  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
  ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
  ; CHECK-NEXT:    [[OP_EXTRA]] = add i32 [[TMP3]], [[SUM]]
+; CHECK-NEXT:    [[ADD_7:%.*]] = add i32 undef, [[ADD_6]]
  ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]]
  ; CHECK:       for.end:
  ; CHECK-NEXT:    ret i32 [[OP_EXTRA]]
@@ -130,6 +138,13 @@ define i32 @test2(i32* nocapture readonly %p, i32* nocapture readonly %q) {
  ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[Q]] to <8 x i32>*
  ; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4
  ; CHECK-NEXT:    [[TMP4:%.*]] = mul <8 x i32> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 undef, [[SUM]]
+; CHECK-NEXT:    [[ADD_1:%.*]] = add i32 undef, [[ADD]]
+; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 undef, [[ADD_1]]
+; CHECK-NEXT:    [[ADD_3:%.*]] = add i32 undef, [[ADD_2]]
+; CHECK-NEXT:    [[ADD_4:%.*]] = add i32 undef, [[ADD_3]]
+; CHECK-NEXT:    [[ADD_5:%.*]] = add i32 undef, [[ADD_4]]
+; CHECK-NEXT:    [[ADD_6:%.*]] = add i32 undef, [[ADD_5]]
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <8 x i32> [[TMP4]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -138,6 +153,7 @@ define i32 @test2(i32* nocapture readonly %p, i32* nocapture readonly %q) {
  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
  ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
  ; CHECK-NEXT:    [[OP_EXTRA]] = add i32 [[TMP5]], [[SUM]]
+; CHECK-NEXT:    [[ADD_7:%.*]] = add i32 undef, [[ADD_6]]
  ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]]
  ; CHECK:       for.end:
  ; CHECK-NEXT:    ret i32 [[OP_EXTRA]]
@@ -242,6 +258,13 @@ define i32 @test3(i32* nocapture readonly %p, i32* nocapture readonly %q) {
  ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[Q]] to <8 x i32>*
  ; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4
  ; CHECK-NEXT:    [[TMP4:%.*]] = mul <8 x i32> [[REORDER_SHUFFLE]], [[TMP3]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 undef, [[SUM]]
+; CHECK-NEXT:    [[ADD_1:%.*]] = add i32 undef, [[ADD]]
+; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 undef, [[ADD_1]]
+; CHECK-NEXT:    [[ADD_3:%.*]] = add i32 undef, [[ADD_2]]
+; CHECK-NEXT:    [[ADD_4:%.*]] = add i32 undef, [[ADD_3]]
+; CHECK-NEXT:    [[ADD_5:%.*]] = add i32 undef, [[ADD_4]]
+; CHECK-NEXT:    [[ADD_6:%.*]] = add i32 undef, [[ADD_5]]
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <8 x i32> [[TMP4]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -250,6 +273,7 @@ define i32 @test3(i32* nocapture readonly %p, i32* nocapture readonly %q) {
  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
  ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
  ; CHECK-NEXT:    [[OP_EXTRA]] = add i32 [[TMP5]], [[SUM]]
+; CHECK-NEXT:    [[ADD_7:%.*]] = add i32 undef, [[ADD_6]]
  ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]]
  ; CHECK:       for.end:
  ; CHECK-NEXT:    ret i32 [[OP_EXTRA]]
diff --git a/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll b/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll

index 7fdc95392235798faa967d8e0a40081aaf5119d3..4dd40876a70a8b56f226d5fd8eff1c1a5200cff9 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
+++ b/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
@@ -26,6 +26,12 @@ define i32 @test_add(i32* nocapture readonly %p) {
  ; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7
  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[MUL_18:%.*]] = add i32 undef, undef
+; CHECK-NEXT:    [[MUL_29:%.*]] = add i32 undef, [[MUL_18]]
+; CHECK-NEXT:    [[MUL_310:%.*]] = add i32 undef, [[MUL_29]]
+; CHECK-NEXT:    [[MUL_411:%.*]] = add i32 undef, [[MUL_310]]
+; CHECK-NEXT:    [[MUL_512:%.*]] = add i32 undef, [[MUL_411]]
+; CHECK-NEXT:    [[MUL_613:%.*]] = add i32 undef, [[MUL_512]]
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <8 x i32> [[TMP1]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -33,6 +39,7 @@ define i32 @test_add(i32* nocapture readonly %p) {
  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
  ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
+; CHECK-NEXT:    [[MUL_714:%.*]] = add i32 undef, [[MUL_613]]
  ; CHECK-NEXT:    ret i32 [[TMP2]]
  ;
  entry:
@@ -140,6 +147,12 @@ define i32 @test_and(i32* nocapture readonly %p) {
  ; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7
  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[MUL_18:%.*]] = and i32 undef, undef
+; CHECK-NEXT:    [[MUL_29:%.*]] = and i32 undef, [[MUL_18]]
+; CHECK-NEXT:    [[MUL_310:%.*]] = and i32 undef, [[MUL_29]]
+; CHECK-NEXT:    [[MUL_411:%.*]] = and i32 undef, [[MUL_310]]
+; CHECK-NEXT:    [[MUL_512:%.*]] = and i32 undef, [[MUL_411]]
+; CHECK-NEXT:    [[MUL_613:%.*]] = and i32 undef, [[MUL_512]]
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = and <8 x i32> [[TMP1]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -147,6 +160,7 @@ define i32 @test_and(i32* nocapture readonly %p) {
  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
  ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
+; CHECK-NEXT:    [[MUL_714:%.*]] = and i32 undef, [[MUL_613]]
  ; CHECK-NEXT:    ret i32 [[TMP2]]
  ;
  entry:
@@ -194,6 +208,12 @@ define i32 @test_or(i32* nocapture readonly %p) {
  ; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7
  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[MUL_18:%.*]] = or i32 undef, undef
+; CHECK-NEXT:    [[MUL_29:%.*]] = or i32 undef, [[MUL_18]]
+; CHECK-NEXT:    [[MUL_310:%.*]] = or i32 undef, [[MUL_29]]
+; CHECK-NEXT:    [[MUL_411:%.*]] = or i32 undef, [[MUL_310]]
+; CHECK-NEXT:    [[MUL_512:%.*]] = or i32 undef, [[MUL_411]]
+; CHECK-NEXT:    [[MUL_613:%.*]] = or i32 undef, [[MUL_512]]
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = or <8 x i32> [[TMP1]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -201,6 +221,7 @@ define i32 @test_or(i32* nocapture readonly %p) {
  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = or <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
  ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
+; CHECK-NEXT:    [[MUL_714:%.*]] = or i32 undef, [[MUL_613]]
  ; CHECK-NEXT:    ret i32 [[TMP2]]
  ;
  entry:
@@ -248,6 +269,12 @@ define i32 @test_xor(i32* nocapture readonly %p) {
  ; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7
  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[MUL_18:%.*]] = xor i32 undef, undef
+; CHECK-NEXT:    [[MUL_29:%.*]] = xor i32 undef, [[MUL_18]]
+; CHECK-NEXT:    [[MUL_310:%.*]] = xor i32 undef, [[MUL_29]]
+; CHECK-NEXT:    [[MUL_411:%.*]] = xor i32 undef, [[MUL_310]]
+; CHECK-NEXT:    [[MUL_512:%.*]] = xor i32 undef, [[MUL_411]]
+; CHECK-NEXT:    [[MUL_613:%.*]] = xor i32 undef, [[MUL_512]]
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = xor <8 x i32> [[TMP1]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -255,6 +282,7 @@ define i32 @test_xor(i32* nocapture readonly %p) {
  ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = xor <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
  ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
+; CHECK-NEXT:    [[MUL_714:%.*]] = xor i32 undef, [[MUL_613]]
  ; CHECK-NEXT:    ret i32 [[TMP2]]
  ;
  entry:
@@ -294,12 +322,15 @@ define i32 @PR37731(<4 x i32>* noalias nocapture dereferenceable(16) %self) unna
  ; CHECK-NEXT:    [[TMP5:%.*]] = shl <4 x i32> [[TMP4]], <i32 18, i32 2, i32 7, i32 13>
  ; CHECK-NEXT:    [[TMP6:%.*]] = xor <4 x i32> [[TMP3]], [[TMP5]]
  ; CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[SELF]], align 16
+; CHECK-NEXT:    [[TMP7:%.*]] = xor i32 undef, undef
+; CHECK-NEXT:    [[TMP8:%.*]] = xor i32 [[TMP7]], undef
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX:%.*]] = xor <4 x i32> [[TMP6]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = xor <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
-; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
-; CHECK-NEXT:    ret i32 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
+; CHECK-NEXT:    [[TMP10:%.*]] = xor i32 [[TMP8]], undef
+; CHECK-NEXT:    ret i32 [[TMP9]]
  ;
  entry:
    %0 = load <4 x i32>, <4 x i32>* %self, align 16
diff --git a/test/Transforms/SLPVectorizer/X86/remark_horcost.ll b/test/Transforms/SLPVectorizer/X86/remark_horcost.ll

index 4a69a35420b4be53359f61920656f20bcf96e66a..27997f6af3d73b988c32b2d2df8a1add92fcb546 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
+++ b/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
@@ -33,8 +33,11 @@ define i32 @foo(i32* %diff) #0 {
  ; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i32* [[ARRAYIDX2]] to <4 x i32>*
  ; CHECK-NEXT:    [[TMP12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP11]], align 4
  ; CHECK-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[TMP12]], [[TMP9]]
+; CHECK-NEXT:    [[ADD10:%.*]] = add nsw i32 undef, [[A_088]]
  ; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 1
+; CHECK-NEXT:    [[ADD24:%.*]] = add nsw i32 [[ADD10]], undef
  ; CHECK-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 2
+; CHECK-NEXT:    [[ADD38:%.*]] = add nsw i32 [[ADD24]], undef
  ; CHECK-NEXT:    [[ARRAYIDX48:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 3
  ; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i32* [[ARRAYIDX6]] to <4 x i32>*
  ; CHECK-NEXT:    store <4 x i32> [[TMP13]], <4 x i32>* [[TMP14]], align 16
@@ -44,6 +47,7 @@ define i32 @foo(i32* %diff) #0 {
  ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = add nsw <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
  ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
  ; CHECK-NEXT:    [[OP_EXTRA]] = add nsw i32 [[TMP15]], [[A_088]]
+; CHECK-NEXT:    [[ADD52:%.*]] = add nsw i32 [[ADD38]], undef
  ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
  ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8
  ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
diff --git a/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll b/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll

index 768cba9c37bb6bfe0e02b5a8f67db770ca76c77f..13884efd98dd89dacde361c51a2c16ad93309ed4 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
+++ b/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
@@ -19,6 +19,11 @@ define void @hoge() {
  ; CHECK-NEXT:    [[TMP4:%.*]] = sub <2 x i32> [[TMP3]], undef
  ; CHECK-NEXT:    [[SHUFFLE8:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
  ; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[SHUFFLE8]], <i32 undef, i32 15, i32 31, i32 47>
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp sgt i32 undef, undef
+; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 undef, i32 undef
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], undef
+; CHECK-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], undef
  ; CHECK-NEXT:    [[RDX_SHUF9:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[RDX_MINMAX_CMP10:%.*]] = icmp sgt <4 x i32> [[TMP5]], [[RDX_SHUF9]]
  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT11:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP10]], <4 x i32> [[TMP5]], <4 x i32> [[RDX_SHUF9]]
@@ -26,12 +31,28 @@ define void @hoge() {
  ; CHECK-NEXT:    [[RDX_MINMAX_CMP13:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT11]], [[RDX_SHUF12]]
  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT14:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP13]], <4 x i32> [[RDX_MINMAX_SELECT11]], <4 x i32> [[RDX_SHUF12]]
  ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT14]], i32 0
+; CHECK-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef
  ; CHECK-NEXT:    [[TMP19:%.*]] = select i1 undef, i32 [[TMP6]], i32 undef
  ; CHECK-NEXT:    [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], 63
  ; CHECK-NEXT:    [[TMP7:%.*]] = sub nsw <2 x i32> undef, [[TMP2]]
  ; CHECK-NEXT:    [[TMP8:%.*]] = sub <2 x i32> [[TMP7]], undef
  ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
  ; CHECK-NEXT:    [[TMP9:%.*]] = add nsw <4 x i32> [[SHUFFLE]], <i32 -49, i32 -33, i32 -33, i32 -17>
+; CHECK-NEXT:    [[TMP26:%.*]] = icmp sgt i32 undef, undef
+; CHECK-NEXT:    [[TMP27:%.*]] = select i1 [[TMP26]], i32 undef, i32 undef
+; CHECK-NEXT:    [[TMP28:%.*]] = icmp sgt i32 [[TMP27]], undef
+; CHECK-NEXT:    [[TMP29:%.*]] = select i1 [[TMP28]], i32 undef, i32 [[TMP27]]
+; CHECK-NEXT:    [[TMP31:%.*]] = icmp sgt i32 undef, undef
+; CHECK-NEXT:    [[TMP32:%.*]] = select i1 [[TMP31]], i32 undef, i32 undef
+; CHECK-NEXT:    [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], [[TMP29]]
+; CHECK-NEXT:    [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP29]], i32 [[TMP32]]
+; CHECK-NEXT:    [[TMP36:%.*]] = icmp sgt i32 undef, undef
+; CHECK-NEXT:    [[TMP37:%.*]] = select i1 [[TMP36]], i32 undef, i32 undef
+; CHECK-NEXT:    [[TMP38:%.*]] = icmp sgt i32 [[TMP37]], [[TMP34]]
+; CHECK-NEXT:    [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP34]], i32 [[TMP37]]
+; CHECK-NEXT:    [[TMP41:%.*]] = icmp sgt i32 undef, undef
+; CHECK-NEXT:    [[TMP42:%.*]] = select i1 [[TMP41]], i32 undef, i32 undef
+; CHECK-NEXT:    [[TMP43:%.*]] = icmp sgt i32 [[TMP42]], [[TMP39]]
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP9]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP9]], <4 x i32> [[RDX_SHUF]]
@@ -49,6 +70,7 @@ define void @hoge() {
  ; CHECK-NEXT:    [[OP_EXTRA6:%.*]] = select i1 [[TMP14]], i32 [[OP_EXTRA5]], i32 undef
  ; CHECK-NEXT:    [[TMP15:%.*]] = icmp slt i32 [[OP_EXTRA6]], undef
  ; CHECK-NEXT:    [[OP_EXTRA7:%.*]] = select i1 [[TMP15]], i32 [[OP_EXTRA6]], i32 undef
+; CHECK-NEXT:    [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP39]], i32 [[TMP42]]
  ; CHECK-NEXT:    [[TMP45:%.*]] = icmp sgt i32 undef, [[OP_EXTRA7]]
  ; CHECK-NEXT:    unreachable
  ;
diff --git a/test/Transforms/SLPVectorizer/X86/undef_vect.ll b/test/Transforms/SLPVectorizer/X86/undef_vect.ll

index 822c362ec5294e0e5b867da02dcb949831eef852..7ecd5805f931317a5b5f550b7d345ab2465cefbc 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/undef_vect.ll
+++ b/test/Transforms/SLPVectorizer/X86/undef_vect.ll
@@ -16,6 +16,15 @@ define void @_Z2azv() local_unnamed_addr {
  ; CHECK-NEXT:    [[DOTSROA_RAW_IDX_7:%.*]] = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76"* undef, i64 7, i32 1
  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[DOTSROA_CAST_4]] to <8 x i32>*
  ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[CMP_I1_4:%.*]] = icmp slt i32 undef, undef
+; CHECK-NEXT:    [[DOTSROA_SPECULATED_4:%.*]] = select i1 [[CMP_I1_4]], i32 undef, i32 undef
+; CHECK-NEXT:    [[CMP_I1_5:%.*]] = icmp slt i32 [[DOTSROA_SPECULATED_4]], undef
+; CHECK-NEXT:    [[DOTSROA_SPECULATED_5:%.*]] = select i1 [[CMP_I1_5]], i32 undef, i32 [[DOTSROA_SPECULATED_4]]
+; CHECK-NEXT:    [[CMP_I1_6:%.*]] = icmp slt i32 [[DOTSROA_SPECULATED_5]], undef
+; CHECK-NEXT:    [[DOTSROA_SPECULATED_6:%.*]] = select i1 [[CMP_I1_6]], i32 undef, i32 [[DOTSROA_SPECULATED_5]]
+; CHECK-NEXT:    [[CMP_I1_7:%.*]] = icmp slt i32 [[DOTSROA_SPECULATED_6]], undef
+; CHECK-NEXT:    [[DOTSROA_SPECULATED_7:%.*]] = select i1 [[CMP_I1_7]], i32 undef, i32 [[DOTSROA_SPECULATED_6]]
+; CHECK-NEXT:    [[CMP_I1_8:%.*]] = icmp slt i32 undef, undef
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP1]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP1]], <8 x i32> [[RDX_SHUF]]
@@ -30,6 +39,7 @@ define void @_Z2azv() local_unnamed_addr {
  ; CHECK-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 undef
  ; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[OP_EXTRA]], undef
  ; CHECK-NEXT:    [[OP_EXTRA7:%.*]] = select i1 [[TMP4]], i32 [[OP_EXTRA]], i32 undef
+; CHECK-NEXT:    [[DOTSROA_SPECULATED_8:%.*]] = select i1 [[CMP_I1_8]], i32 undef, i32 undef
  ; CHECK-NEXT:    [[DOTSROA_SPECULATED_9:%.*]] = select i1 undef, i32 undef, i32 [[OP_EXTRA7]]
  ; CHECK-NEXT:    [[CMP_I1_10:%.*]] = icmp slt i32 [[DOTSROA_SPECULATED_9]], undef
  ; CHECK-NEXT:    ret void
diff --git a/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll b/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll

index 779d5260506af76d0b166e2e79d1fcad31fc3301..889bba80b7a49e334850a3e03942875abc85a8c9 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
+++ b/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
@@ -18,6 +18,19 @@ define i32 @foo(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4
  ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6
  ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7
  ; CHECK-NEXT:    [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 undef, undef
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef
+; CHECK-NEXT:    [[CMP15:%.*]] = icmp ult i32 [[COND]], undef
+; CHECK-NEXT:    [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32 undef
+; CHECK-NEXT:    [[CMP20:%.*]] = icmp ult i32 [[COND19]], undef
+; CHECK-NEXT:    [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]], i32 undef
+; CHECK-NEXT:    [[CMP25:%.*]] = icmp ult i32 [[COND24]], undef
+; CHECK-NEXT:    [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]], i32 undef
+; CHECK-NEXT:    [[CMP30:%.*]] = icmp ult i32 [[COND29]], undef
+; CHECK-NEXT:    [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]], i32 undef
+; CHECK-NEXT:    [[CMP35:%.*]] = icmp ult i32 [[COND34]], undef
+; CHECK-NEXT:    [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]], i32 undef
+; CHECK-NEXT:    [[CMP40:%.*]] = icmp ult i32 [[COND39]], undef
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i32> [[TMP10]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP10]], <8 x i32> [[RDX_SHUF]]
@@ -28,6 +41,7 @@ define i32 @foo(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4
  ; CHECK-NEXT:    [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]]
  ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0
+; CHECK-NEXT:    [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]], i32 undef
  ; CHECK-NEXT:    ret i32 [[TMP11]]
  ;
  entry:
@@ -78,6 +92,19 @@ define i32 @foo1(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a
  ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6
  ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7
  ; CHECK-NEXT:    [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 undef, undef
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef
+; CHECK-NEXT:    [[CMP15:%.*]] = icmp ult i32 [[COND]], undef
+; CHECK-NEXT:    [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32 undef
+; CHECK-NEXT:    [[CMP20:%.*]] = icmp ult i32 [[COND19]], undef
+; CHECK-NEXT:    [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]], i32 undef
+; CHECK-NEXT:    [[CMP25:%.*]] = icmp ult i32 [[COND24]], undef
+; CHECK-NEXT:    [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]], i32 undef
+; CHECK-NEXT:    [[CMP30:%.*]] = icmp ult i32 [[COND29]], undef
+; CHECK-NEXT:    [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]], i32 undef
+; CHECK-NEXT:    [[CMP35:%.*]] = icmp ult i32 [[COND34]], undef
+; CHECK-NEXT:    [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]], i32 undef
+; CHECK-NEXT:    [[CMP40:%.*]] = icmp ult i32 [[COND39]], undef
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i32> [[TMP10]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP10]], <8 x i32> [[RDX_SHUF]]
@@ -88,6 +115,7 @@ define i32 @foo1(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a
  ; CHECK-NEXT:    [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]]
  ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0
+; CHECK-NEXT:    [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]], i32 undef
  ; CHECK-NEXT:    ret i32 [[TMP11]]
  ;
  entry:
@@ -142,6 +170,19 @@ define i32 @foo2(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a
  ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6
  ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7
  ; CHECK-NEXT:    [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 undef, undef
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef
+; CHECK-NEXT:    [[CMP15:%.*]] = icmp ult i32 [[COND]], undef
+; CHECK-NEXT:    [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32 undef
+; CHECK-NEXT:    [[CMP20:%.*]] = icmp ult i32 [[COND19]], undef
+; CHECK-NEXT:    [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]], i32 undef
+; CHECK-NEXT:    [[CMP25:%.*]] = icmp ult i32 [[COND24]], undef
+; CHECK-NEXT:    [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]], i32 undef
+; CHECK-NEXT:    [[CMP30:%.*]] = icmp ult i32 [[COND29]], undef
+; CHECK-NEXT:    [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]], i32 undef
+; CHECK-NEXT:    [[CMP35:%.*]] = icmp ult i32 [[COND34]], undef
+; CHECK-NEXT:    [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]], i32 undef
+; CHECK-NEXT:    [[CMP40:%.*]] = icmp ult i32 [[COND39]], undef
  ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i32> [[TMP10]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP10]], <8 x i32> [[RDX_SHUF]]
@@ -152,6 +193,7 @@ define i32 @foo2(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a
  ; CHECK-NEXT:    [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
  ; CHECK-NEXT:    [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]]
  ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0
+; CHECK-NEXT:    [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]], i32 undef
  ; CHECK-NEXT:    ret i32 [[TMP11]]
  ;
  entry:
author	Jordan Rupprecht <rupprecht@google.com>
	Thu, 26 Sep 2019 22:09:17 +0000 (22:09 +0000)
committer	Jordan Rupprecht <rupprecht@google.com>
	Thu, 26 Sep 2019 22:09:17 +0000 (22:09 +0000)
include/llvm/Transforms/Vectorize/SLPVectorizer.h		patch \| blob \| history
lib/Transforms/Vectorize/SLPVectorizer.cpp		patch \| blob \| history
test/Transforms/SLPVectorizer/AArch64/gather-root.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/AArch64/horizontal.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/PR31847.ll	[deleted file]	patch \| blob \| history
test/Transforms/SLPVectorizer/X86/PR35628_1.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/PR35628_2.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/PR39774.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/PR40310.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/bad-reduction.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/horizontal-list.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/horizontal.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/long_chains.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/reassociated-loads.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/reduction_loads.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/remark_horcost.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/undef_vect.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll		patch \| blob \| history