From: Jordan Rupprecht Date: Thu, 26 Sep 2019 22:09:17 +0000 (+0000) Subject: Revert [SLP] Fix for PR31847: Assertion failed: (isLoopInvariant(Operands[i], L)... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=255118a958473ad4e8a2823f6e91615c0b20fcd2;p=llvm Revert [SLP] Fix for PR31847: Assertion failed: (isLoopInvariant(Operands[i], L) && "SCEVAddRecExpr operand is not loop-invariant!") This reverts r372626 (git commit 6a278d9073bdc158d31d4f4b15bbe34238f22c18) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@373019 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/include/llvm/Transforms/Vectorize/SLPVectorizer.h index 32ccc8a4638..ac6afb761d4 100644 --- a/include/llvm/Transforms/Vectorize/SLPVectorizer.h +++ b/include/llvm/Transforms/Vectorize/SLPVectorizer.h @@ -24,6 +24,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/ValueHandle.h" namespace llvm { @@ -59,8 +60,8 @@ extern cl::opt RunSLPVectorization; struct SLPVectorizerPass : public PassInfoMixin { using StoreList = SmallVector; using StoreListMap = MapVector; - using GEPList = SmallVector; - using GEPListMap = MapVector; + using WeakTrackingVHList = SmallVector; + using WeakTrackingVHListMap = MapVector; ScalarEvolution *SE = nullptr; TargetTransformInfo *TTI = nullptr; @@ -130,7 +131,7 @@ private: /// Tries to vectorize constructs started from CmpInst, InsertValueInst or /// InsertElementInst instructions. - bool vectorizeSimpleInstructions(SmallVectorImpl &Instructions, + bool vectorizeSimpleInstructions(SmallVectorImpl &Instructions, BasicBlock *BB, slpvectorizer::BoUpSLP &R); /// Scan the basic block and look for patterns that are likely to start @@ -146,7 +147,7 @@ private: StoreListMap Stores; /// The getelementptr instructions in a basic block organized by base pointer. - GEPListMap GEPs; + WeakTrackingVHListMap GEPs; }; } // end namespace llvm diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 71b0abbf739..12c114f83d4 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1121,14 +1121,6 @@ public: #endif }; - /// Checks if the instruction is marked for deletion. - bool isDeleted(Instruction *I) const { return DeletedInstructions.count(I); } - - /// Marks values for later deletion. - void eraseInstructions(ArrayRef AV); - - ~BoUpSLP(); - private: /// Checks if all users of \p I are the part of the vectorization tree. bool areAllUsersVectorized(Instruction *I) const; @@ -1499,12 +1491,14 @@ private: /// AliasCache, which can happen if a new instruction is allocated at the /// same address as a previously deleted instruction. void eraseInstruction(Instruction *I) { - DeletedInstructions.insert(I); + I->removeFromParent(); + I->dropAllReferences(); + DeletedInstructions.emplace_back(I); } /// Temporary store for deleted instructions. Instructions will be deleted /// eventually when the BoUpSLP is destructed. - SmallPtrSet DeletedInstructions; + SmallVector DeletedInstructions; /// A list of values that need to extracted out of the tree. /// This list holds pairs of (Internal Scalar : External User). External User @@ -2061,22 +2055,6 @@ template <> struct DOTGraphTraits : public DefaultDOTGraphTraits { } // end namespace llvm -BoUpSLP::~BoUpSLP() { - for (auto *I : DeletedInstructions) - I->dropAllReferences(); - for (auto *I : DeletedInstructions) { - assert(I->use_empty() && "trying to erase instruction with users."); - I->eraseFromParent(); - } -} - -void BoUpSLP::eraseInstructions(ArrayRef AV) { - for (auto *V : AV) { - if (auto *I = dyn_cast(V)) - eraseInstruction(I); - }; -} - void BoUpSLP::buildTree(ArrayRef Roots, ArrayRef UserIgnoreLst) { ExtraValueToDebugLocsMap ExternallyUsedValues; @@ -3563,7 +3541,7 @@ Value *BoUpSLP::Gather(ArrayRef VL, VectorType *Ty) { // Generate the 'InsertElement' instruction. for (unsigned i = 0; i < Ty->getNumElements(); ++i) { Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i)); - if (auto *Insrt = dyn_cast(Vec)) { + if (Instruction *Insrt = dyn_cast(Vec)) { GatherSeq.insert(Insrt); CSEBlocks.insert(Insrt->getParent()); @@ -4312,18 +4290,20 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) { Value *Scalar = Entry->Scalars[Lane]; -#ifndef NDEBUG Type *Ty = Scalar->getType(); if (!Ty->isVoidTy()) { +#ifndef NDEBUG for (User *U : Scalar->users()) { LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n"); - // It is legal to delete users in the ignorelist. + // It is legal to replace users in the ignorelist by undef. assert((getTreeEntry(U) || is_contained(UserIgnoreList, U)) && - "Deleting out-of-tree value"); + "Replacing out-of-tree value with undef"); } - } #endif + Value *Undef = UndefValue::get(Ty); + Scalar->replaceAllUsesWith(Undef); + } LLVM_DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n"); eraseInstruction(cast(Scalar)); } @@ -4339,7 +4319,7 @@ void BoUpSLP::optimizeGatherSequence() { << " gather sequences instructions.\n"); // LICM InsertElementInst sequences. for (Instruction *I : GatherSeq) { - if (isDeleted(I)) + if (!isa(I) && !isa(I)) continue; // Check if this block is inside a loop. @@ -4393,8 +4373,6 @@ void BoUpSLP::optimizeGatherSequence() { // For all instructions in blocks containing gather sequences: for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) { Instruction *In = &*it++; - if (isDeleted(In)) - continue; if (!isa(In) && !isa(In)) continue; @@ -5277,6 +5255,19 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, return Changed; } +/// Check that the Values in the slice in VL array are still existent in +/// the WeakTrackingVH array. +/// Vectorization of part of the VL array may cause later values in the VL array +/// to become invalid. We track when this has happened in the WeakTrackingVH +/// array. +static bool hasValueBeenRAUWed(ArrayRef VL, + ArrayRef VH, unsigned SliceBegin, + unsigned SliceSize) { + VL = VL.slice(SliceBegin, SliceSize); + VH = VH.slice(SliceBegin, SliceSize); + return !std::equal(VL.begin(), VL.end(), VH.begin()); +} + bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, unsigned VecRegSize) { const unsigned ChainLen = Chain.size(); @@ -5288,20 +5279,20 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, if (!isPowerOf2_32(Sz) || VF < 2) return false; + // Keep track of values that were deleted by vectorizing in the loop below. + const SmallVector TrackValues(Chain.begin(), Chain.end()); + bool Changed = false; // Look for profitable vectorizable trees at all offsets, starting at zero. for (unsigned i = 0, e = ChainLen; i + VF <= e; ++i) { - ArrayRef Operands = Chain.slice(i, VF); // Check that a previous iteration of this loop did not delete the Value. - if (llvm::any_of(Operands, [&R](Value *V) { - auto *I = dyn_cast(V); - return I && R.isDeleted(I); - })) + if (hasValueBeenRAUWed(Chain, TrackValues, i, VF)) continue; LLVM_DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i << "\n"); + ArrayRef Operands = Chain.slice(i, VF); R.buildTree(Operands); if (R.isTreeTinyAndNotFullyVectorizable()) @@ -5493,6 +5484,9 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, bool CandidateFound = false; int MinCost = SLPCostThreshold; + // Keep track of values that were deleted by vectorizing in the loop below. + SmallVector TrackValues(VL.begin(), VL.end()); + unsigned NextInst = 0, MaxInst = VL.size(); for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; VF /= 2) { // No actual vectorization should happen, if number of parts is the same as @@ -5512,16 +5506,13 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2) break; - ArrayRef Ops = VL.slice(I, OpsWidth); // Check that a previous iteration of this loop did not delete the Value. - if (llvm::any_of(Ops, [&R](Value *V) { - auto *I = dyn_cast(V); - return I && R.isDeleted(I); - })) + if (hasValueBeenRAUWed(VL, TrackValues, I, OpsWidth)) continue; LLVM_DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations " << "\n"); + ArrayRef Ops = VL.slice(I, OpsWidth); R.buildTree(Ops); Optional> Order = R.bestOrder(); @@ -5742,23 +5733,23 @@ class HorizontalReduction { case RK_Min: Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSLT(LHS, RHS) : Builder.CreateFCmpOLT(LHS, RHS); - return Builder.CreateSelect(Cmp, LHS, RHS, Name); + break; case RK_Max: Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSGT(LHS, RHS) : Builder.CreateFCmpOGT(LHS, RHS); - return Builder.CreateSelect(Cmp, LHS, RHS, Name); + break; case RK_UMin: assert(Opcode == Instruction::ICmp && "Expected integer types."); Cmp = Builder.CreateICmpULT(LHS, RHS); - return Builder.CreateSelect(Cmp, LHS, RHS, Name); + break; case RK_UMax: assert(Opcode == Instruction::ICmp && "Expected integer types."); Cmp = Builder.CreateICmpUGT(LHS, RHS); - return Builder.CreateSelect(Cmp, LHS, RHS, Name); - case RK_None: break; + case RK_None: + llvm_unreachable("Unknown reduction operation."); } - llvm_unreachable("Unknown reduction operation."); + return Builder.CreateSelect(Cmp, LHS, RHS, Name); } public: @@ -6438,9 +6429,6 @@ public: } // Update users. ReductionRoot->replaceAllUsesWith(VectorizedTree); - // Mark all scalar reduction ops for deletion, they are replaced by the - // vector reductions. - V.eraseInstructions(IgnoreList); } return VectorizedTree != nullptr; } @@ -6695,13 +6683,18 @@ static bool tryToVectorizeHorReductionOrInstOperands( // horizontal reduction. // Interrupt the process if the Root instruction itself was vectorized or all // sub-trees not higher that RecursionMaxDepth were analyzed/vectorized. - SmallVector, 8> Stack(1, {Root, 0}); + SmallVector, 8> Stack(1, {Root, 0}); SmallPtrSet VisitedInstrs; bool Res = false; while (!Stack.empty()) { - Instruction *Inst; + Value *V; unsigned Level; - std::tie(Inst, Level) = Stack.pop_back_val(); + std::tie(V, Level) = Stack.pop_back_val(); + if (!V) + continue; + auto *Inst = dyn_cast(V); + if (!Inst) + continue; auto *BI = dyn_cast(Inst); auto *SI = dyn_cast(Inst); if (BI || SI) { @@ -6742,8 +6735,8 @@ static bool tryToVectorizeHorReductionOrInstOperands( for (auto *Op : Inst->operand_values()) if (VisitedInstrs.insert(Op).second) if (auto *I = dyn_cast(Op)) - if (!isa(I) && !R.isDeleted(I) && I->getParent() == BB) - Stack.emplace_back(I, Level); + if (!isa(I) && I->getParent() == BB) + Stack.emplace_back(Op, Level); } return Res; } @@ -6812,10 +6805,11 @@ bool SLPVectorizerPass::vectorizeCmpInst(CmpInst *CI, BasicBlock *BB, } bool SLPVectorizerPass::vectorizeSimpleInstructions( - SmallVectorImpl &Instructions, BasicBlock *BB, BoUpSLP &R) { + SmallVectorImpl &Instructions, BasicBlock *BB, BoUpSLP &R) { bool OpsChanged = false; - for (auto *I : reverse(Instructions)) { - if (R.isDeleted(I)) + for (auto &VH : reverse(Instructions)) { + auto *I = dyn_cast_or_null(VH); + if (!I) continue; if (auto *LastInsertValue = dyn_cast(I)) OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R); @@ -6844,7 +6838,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { if (!P) break; - if (!VisitedInstrs.count(P) && !R.isDeleted(P)) + if (!VisitedInstrs.count(P)) Incoming.push_back(P); } @@ -6888,12 +6882,9 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { VisitedInstrs.clear(); - SmallVector PostProcessInstructions; + SmallVector PostProcessInstructions; SmallDenseSet KeyNodes; for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { - // Skip instructions marked for the deletion. - if (R.isDeleted(&*it)) - continue; // We may go through BB multiple times so skip the one we have checked. if (!VisitedInstrs.insert(&*it).second) { if (it->use_empty() && KeyNodes.count(&*it) > 0 && @@ -6986,10 +6977,10 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { SetVector Candidates(GEPList.begin(), GEPList.end()); // Some of the candidates may have already been vectorized after we - // initially collected them. If so, they are marked as deleted, so remove - // them from the set of candidates. - Candidates.remove_if( - [&R](Value *I) { return R.isDeleted(cast(I)); }); + // initially collected them. If so, the WeakTrackingVHs will have + // nullified the + // values, so remove them from the set of candidates. + Candidates.remove(nullptr); // Remove from the set of candidates all pairs of getelementptrs with // constant differences. Such getelementptrs are likely not good @@ -6997,18 +6988,18 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { // computed from the other. We also ensure all candidate getelementptr // indices are unique. for (int I = 0, E = GEPList.size(); I < E && Candidates.size() > 1; ++I) { - auto *GEPI = GEPList[I]; + auto *GEPI = cast(GEPList[I]); if (!Candidates.count(GEPI)) continue; auto *SCEVI = SE->getSCEV(GEPList[I]); for (int J = I + 1; J < E && Candidates.size() > 1; ++J) { - auto *GEPJ = GEPList[J]; + auto *GEPJ = cast(GEPList[J]); auto *SCEVJ = SE->getSCEV(GEPList[J]); if (isa(SE->getMinusSCEV(SCEVI, SCEVJ))) { - Candidates.remove(GEPI); - Candidates.remove(GEPJ); + Candidates.remove(GEPList[I]); + Candidates.remove(GEPList[J]); } else if (GEPI->idx_begin()->get() == GEPJ->idx_begin()->get()) { - Candidates.remove(GEPJ); + Candidates.remove(GEPList[J]); } } } diff --git a/test/Transforms/SLPVectorizer/AArch64/gather-root.ll b/test/Transforms/SLPVectorizer/AArch64/gather-root.ll index 5f8cbd5c8d0..5e6db8b4836 100644 --- a/test/Transforms/SLPVectorizer/AArch64/gather-root.ll +++ b/test/Transforms/SLPVectorizer/AArch64/gather-root.ll @@ -17,8 +17,16 @@ define void @PR28330(i32 %n) { ; DEFAULT: for.body: ; DEFAULT-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32> +; DEFAULT-NEXT: [[P20:%.*]] = add i32 [[P17]], undef +; DEFAULT-NEXT: [[P22:%.*]] = add i32 [[P20]], undef +; DEFAULT-NEXT: [[P24:%.*]] = add i32 [[P22]], undef +; DEFAULT-NEXT: [[P26:%.*]] = add i32 [[P24]], undef +; DEFAULT-NEXT: [[P28:%.*]] = add i32 [[P26]], undef +; DEFAULT-NEXT: [[P30:%.*]] = add i32 [[P28]], undef +; DEFAULT-NEXT: [[P32:%.*]] = add i32 [[P30]], undef ; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) ; DEFAULT-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], [[P17]] +; DEFAULT-NEXT: [[P34:%.*]] = add i32 [[P32]], undef ; DEFAULT-NEXT: br label [[FOR_BODY]] ; ; GATHER-LABEL: @PR28330( @@ -28,30 +36,37 @@ define void @PR28330(i32 %n) { ; GATHER-NEXT: br label [[FOR_BODY:%.*]] ; GATHER: for.body: ; GATHER-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; GATHER-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7 -; GATHER-NEXT: [[TMP3:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0 -; GATHER-NEXT: [[TMP4:%.*]] = insertelement <8 x i1> undef, i1 [[TMP3]], i32 0 -; GATHER-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1 -; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i1> [[TMP4]], i1 [[TMP5]], i32 1 -; GATHER-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2 -; GATHER-NEXT: [[TMP8:%.*]] = insertelement <8 x i1> [[TMP6]], i1 [[TMP7]], i32 2 -; GATHER-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3 -; GATHER-NEXT: [[TMP10:%.*]] = insertelement <8 x i1> [[TMP8]], i1 [[TMP9]], i32 3 -; GATHER-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4 -; GATHER-NEXT: [[TMP12:%.*]] = insertelement <8 x i1> [[TMP10]], i1 [[TMP11]], i32 4 -; GATHER-NEXT: [[TMP13:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5 -; GATHER-NEXT: [[TMP14:%.*]] = insertelement <8 x i1> [[TMP12]], i1 [[TMP13]], i32 5 -; GATHER-NEXT: [[TMP15:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6 -; GATHER-NEXT: [[TMP16:%.*]] = insertelement <8 x i1> [[TMP14]], i1 [[TMP15]], i32 6 -; GATHER-NEXT: [[TMP17:%.*]] = insertelement <8 x i1> [[TMP16]], i1 [[TMP2]], i32 7 +; GATHER-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0 +; GATHER-NEXT: [[TMP3:%.*]] = insertelement <8 x i1> undef, i1 [[TMP2]], i32 0 +; GATHER-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1 +; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i1> [[TMP3]], i1 [[TMP4]], i32 1 +; GATHER-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2 +; GATHER-NEXT: [[TMP7:%.*]] = insertelement <8 x i1> [[TMP5]], i1 [[TMP6]], i32 2 +; GATHER-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3 +; GATHER-NEXT: [[TMP9:%.*]] = insertelement <8 x i1> [[TMP7]], i1 [[TMP8]], i32 3 +; GATHER-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4 +; GATHER-NEXT: [[TMP11:%.*]] = insertelement <8 x i1> [[TMP9]], i1 [[TMP10]], i32 4 +; GATHER-NEXT: [[TMP12:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5 +; GATHER-NEXT: [[TMP13:%.*]] = insertelement <8 x i1> [[TMP11]], i1 [[TMP12]], i32 5 +; GATHER-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6 +; GATHER-NEXT: [[TMP15:%.*]] = insertelement <8 x i1> [[TMP13]], i1 [[TMP14]], i32 6 +; GATHER-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7 +; GATHER-NEXT: [[TMP17:%.*]] = insertelement <8 x i1> [[TMP15]], i1 [[TMP16]], i32 7 ; GATHER-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i32> , <8 x i32> ; GATHER-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP18]], i32 0 +; GATHER-NEXT: [[P20:%.*]] = add i32 [[P17]], [[TMP19]] ; GATHER-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP18]], i32 1 +; GATHER-NEXT: [[P22:%.*]] = add i32 [[P20]], [[TMP20]] ; GATHER-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP18]], i32 2 +; GATHER-NEXT: [[P24:%.*]] = add i32 [[P22]], [[TMP21]] ; GATHER-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP18]], i32 3 +; GATHER-NEXT: [[P26:%.*]] = add i32 [[P24]], [[TMP22]] ; GATHER-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP18]], i32 4 +; GATHER-NEXT: [[P28:%.*]] = add i32 [[P26]], [[TMP23]] ; GATHER-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP18]], i32 5 +; GATHER-NEXT: [[P30:%.*]] = add i32 [[P28]], [[TMP24]] ; GATHER-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP18]], i32 6 +; GATHER-NEXT: [[P32:%.*]] = add i32 [[P30]], [[TMP25]] ; GATHER-NEXT: [[TMP26:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0 ; GATHER-NEXT: [[TMP27:%.*]] = insertelement <8 x i32> [[TMP26]], i32 [[TMP20]], i32 1 ; GATHER-NEXT: [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32 [[TMP21]], i32 2 @@ -63,6 +78,7 @@ define void @PR28330(i32 %n) { ; GATHER-NEXT: [[TMP34:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP33]], i32 7 ; GATHER-NEXT: [[TMP35:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP34]]) ; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP35]], [[P17]] +; GATHER-NEXT: [[P34:%.*]] = add i32 [[P32]], [[TMP33]] ; GATHER-NEXT: br label [[FOR_BODY]] ; ; MAX-COST-LABEL: @PR28330( @@ -153,8 +169,16 @@ define void @PR32038(i32 %n) { ; DEFAULT: for.body: ; DEFAULT-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32> +; DEFAULT-NEXT: [[P20:%.*]] = add i32 -5, undef +; DEFAULT-NEXT: [[P22:%.*]] = add i32 [[P20]], undef +; DEFAULT-NEXT: [[P24:%.*]] = add i32 [[P22]], undef +; DEFAULT-NEXT: [[P26:%.*]] = add i32 [[P24]], undef +; DEFAULT-NEXT: [[P28:%.*]] = add i32 [[P26]], undef +; DEFAULT-NEXT: [[P30:%.*]] = add i32 [[P28]], undef +; DEFAULT-NEXT: [[P32:%.*]] = add i32 [[P30]], undef ; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) ; DEFAULT-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], -5 +; DEFAULT-NEXT: [[P34:%.*]] = add i32 [[P32]], undef ; DEFAULT-NEXT: br label [[FOR_BODY]] ; ; GATHER-LABEL: @PR32038( @@ -164,30 +188,37 @@ define void @PR32038(i32 %n) { ; GATHER-NEXT: br label [[FOR_BODY:%.*]] ; GATHER: for.body: ; GATHER-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; GATHER-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7 -; GATHER-NEXT: [[TMP3:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0 -; GATHER-NEXT: [[TMP4:%.*]] = insertelement <8 x i1> undef, i1 [[TMP3]], i32 0 -; GATHER-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1 -; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i1> [[TMP4]], i1 [[TMP5]], i32 1 -; GATHER-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2 -; GATHER-NEXT: [[TMP8:%.*]] = insertelement <8 x i1> [[TMP6]], i1 [[TMP7]], i32 2 -; GATHER-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3 -; GATHER-NEXT: [[TMP10:%.*]] = insertelement <8 x i1> [[TMP8]], i1 [[TMP9]], i32 3 -; GATHER-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4 -; GATHER-NEXT: [[TMP12:%.*]] = insertelement <8 x i1> [[TMP10]], i1 [[TMP11]], i32 4 -; GATHER-NEXT: [[TMP13:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5 -; GATHER-NEXT: [[TMP14:%.*]] = insertelement <8 x i1> [[TMP12]], i1 [[TMP13]], i32 5 -; GATHER-NEXT: [[TMP15:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6 -; GATHER-NEXT: [[TMP16:%.*]] = insertelement <8 x i1> [[TMP14]], i1 [[TMP15]], i32 6 -; GATHER-NEXT: [[TMP17:%.*]] = insertelement <8 x i1> [[TMP16]], i1 [[TMP2]], i32 7 +; GATHER-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0 +; GATHER-NEXT: [[TMP3:%.*]] = insertelement <8 x i1> undef, i1 [[TMP2]], i32 0 +; GATHER-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1 +; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i1> [[TMP3]], i1 [[TMP4]], i32 1 +; GATHER-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2 +; GATHER-NEXT: [[TMP7:%.*]] = insertelement <8 x i1> [[TMP5]], i1 [[TMP6]], i32 2 +; GATHER-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3 +; GATHER-NEXT: [[TMP9:%.*]] = insertelement <8 x i1> [[TMP7]], i1 [[TMP8]], i32 3 +; GATHER-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4 +; GATHER-NEXT: [[TMP11:%.*]] = insertelement <8 x i1> [[TMP9]], i1 [[TMP10]], i32 4 +; GATHER-NEXT: [[TMP12:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5 +; GATHER-NEXT: [[TMP13:%.*]] = insertelement <8 x i1> [[TMP11]], i1 [[TMP12]], i32 5 +; GATHER-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6 +; GATHER-NEXT: [[TMP15:%.*]] = insertelement <8 x i1> [[TMP13]], i1 [[TMP14]], i32 6 +; GATHER-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7 +; GATHER-NEXT: [[TMP17:%.*]] = insertelement <8 x i1> [[TMP15]], i1 [[TMP16]], i32 7 ; GATHER-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i32> , <8 x i32> ; GATHER-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP18]], i32 0 +; GATHER-NEXT: [[P20:%.*]] = add i32 -5, [[TMP19]] ; GATHER-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP18]], i32 1 +; GATHER-NEXT: [[P22:%.*]] = add i32 [[P20]], [[TMP20]] ; GATHER-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP18]], i32 2 +; GATHER-NEXT: [[P24:%.*]] = add i32 [[P22]], [[TMP21]] ; GATHER-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP18]], i32 3 +; GATHER-NEXT: [[P26:%.*]] = add i32 [[P24]], [[TMP22]] ; GATHER-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP18]], i32 4 +; GATHER-NEXT: [[P28:%.*]] = add i32 [[P26]], [[TMP23]] ; GATHER-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP18]], i32 5 +; GATHER-NEXT: [[P30:%.*]] = add i32 [[P28]], [[TMP24]] ; GATHER-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP18]], i32 6 +; GATHER-NEXT: [[P32:%.*]] = add i32 [[P30]], [[TMP25]] ; GATHER-NEXT: [[TMP26:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0 ; GATHER-NEXT: [[TMP27:%.*]] = insertelement <8 x i32> [[TMP26]], i32 [[TMP20]], i32 1 ; GATHER-NEXT: [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32 [[TMP21]], i32 2 @@ -199,6 +230,7 @@ define void @PR32038(i32 %n) { ; GATHER-NEXT: [[TMP34:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP33]], i32 7 ; GATHER-NEXT: [[TMP35:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP34]]) ; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP35]], -5 +; GATHER-NEXT: [[P34:%.*]] = add i32 [[P32]], [[TMP33]] ; GATHER-NEXT: br label [[FOR_BODY]] ; ; MAX-COST-LABEL: @PR32038( @@ -227,12 +259,18 @@ define void @PR32038(i32 %n) { ; MAX-COST-NEXT: [[TMP6:%.*]] = insertelement <4 x i1> [[TMP5]], i1 [[P5]], i32 2 ; MAX-COST-NEXT: [[TMP7:%.*]] = insertelement <4 x i1> [[TMP6]], i1 [[P7]], i32 3 ; MAX-COST-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> , <4 x i32> +; MAX-COST-NEXT: [[P20:%.*]] = add i32 -5, undef +; MAX-COST-NEXT: [[P22:%.*]] = add i32 [[P20]], undef +; MAX-COST-NEXT: [[P24:%.*]] = add i32 [[P22]], undef +; MAX-COST-NEXT: [[P26:%.*]] = add i32 [[P24]], undef ; MAX-COST-NEXT: [[P27:%.*]] = select i1 [[P9]], i32 -720, i32 -80 +; MAX-COST-NEXT: [[P28:%.*]] = add i32 [[P26]], [[P27]] ; MAX-COST-NEXT: [[P29:%.*]] = select i1 [[P11]], i32 -720, i32 -80 ; MAX-COST-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP8]]) ; MAX-COST-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[P27]] ; MAX-COST-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], [[P29]] ; MAX-COST-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP11]], -5 +; MAX-COST-NEXT: [[P30:%.*]] = add i32 [[P28]], [[P29]] ; MAX-COST-NEXT: [[P31:%.*]] = select i1 [[P13]], i32 -720, i32 -80 ; MAX-COST-NEXT: [[P32:%.*]] = add i32 [[OP_EXTRA]], [[P31]] ; MAX-COST-NEXT: [[P33:%.*]] = select i1 [[P15]], i32 -720, i32 -80 diff --git a/test/Transforms/SLPVectorizer/AArch64/horizontal.ll b/test/Transforms/SLPVectorizer/AArch64/horizontal.ll index b3ce90dabd8..7e4b95d7234 100644 --- a/test/Transforms/SLPVectorizer/AArch64/horizontal.ll +++ b/test/Transforms/SLPVectorizer/AArch64/horizontal.ll @@ -46,8 +46,12 @@ define i32 @test_select(i32* noalias nocapture readonly %blk1, i32* noalias noca ; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP4]] +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, [[S_026]] +; CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD]], undef +; CHECK-NEXT: [[ADD19:%.*]] = add nsw i32 [[ADD11]], undef ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP7]]) ; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP8]], [[S_026]] +; CHECK-NEXT: [[ADD27:%.*]] = add nsw i32 [[ADD19]], undef ; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i32, i32* [[P1_023]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[ADD_PTR29]] = getelementptr inbounds i32, i32* [[P2_024]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[J_025]], 1 @@ -169,8 +173,12 @@ define i32 @reduction_with_br(i32* noalias nocapture readonly %blk1, i32* noalia ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[P2_018]] to <4 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, [[S_020]] +; CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD]], undef +; CHECK-NEXT: [[ADD9:%.*]] = add nsw i32 [[ADD5]], undef ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP5]], [[S_020]] +; CHECK-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD9]], undef ; CHECK-NEXT: [[CMP14:%.*]] = icmp slt i32 [[OP_EXTRA]], [[LIM:%.*]] ; CHECK-NEXT: br i1 [[CMP14]], label [[IF_END]], label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: if.end: @@ -285,8 +293,16 @@ define i32 @test_unrolled_select(i8* noalias nocapture readonly %blk1, i8* noali ; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <8 x i32> [[TMP6]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = sub nsw <8 x i32> zeroinitializer, [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP7]], <8 x i32> [[TMP8]], <8 x i32> [[TMP6]] +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, [[S_047]] +; CHECK-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD]], undef +; CHECK-NEXT: [[ADD27:%.*]] = add nsw i32 [[ADD16]], undef +; CHECK-NEXT: [[ADD38:%.*]] = add nsw i32 [[ADD27]], undef +; CHECK-NEXT: [[ADD49:%.*]] = add nsw i32 [[ADD38]], undef +; CHECK-NEXT: [[ADD60:%.*]] = add nsw i32 [[ADD49]], undef +; CHECK-NEXT: [[ADD71:%.*]] = add nsw i32 [[ADD60]], undef ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP9]]) ; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP10]], [[S_047]] +; CHECK-NEXT: [[ADD82:%.*]] = add nsw i32 [[ADD71]], undef ; CHECK-NEXT: [[CMP83:%.*]] = icmp slt i32 [[OP_EXTRA]], [[LIM:%.*]] ; CHECK-NEXT: br i1 [[CMP83]], label [[IF_END_86]], label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: if.end.86: diff --git a/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll b/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll index 90771e4eb31..98a9fd482e8 100644 --- a/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll +++ b/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll @@ -13,11 +13,11 @@ define void @patatino(i64 %n, i64 %i, %struct.S* %p) !dbg !7 { ; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[I:%.*]], metadata !19, metadata !DIExpression()), !dbg !24 ; CHECK-NEXT: call void @llvm.dbg.value(metadata %struct.S* [[P:%.*]], metadata !20, metadata !DIExpression()), !dbg !25 ; CHECK-NEXT: [[X1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[P]], i64 [[N]], i32 0, !dbg !26 -; CHECK-NEXT: call void @llvm.dbg.value(metadata !2, metadata !21, metadata !DIExpression()), !dbg !27 +; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 undef, metadata !21, metadata !DIExpression()), !dbg !27 ; CHECK-NEXT: [[Y3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[N]], i32 1, !dbg !28 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[X1]] to <2 x i64>*, !dbg !26 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8, !dbg !26, !tbaa !29 -; CHECK-NEXT: call void @llvm.dbg.value(metadata !2, metadata !22, metadata !DIExpression()), !dbg !33 +; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 undef, metadata !22, metadata !DIExpression()), !dbg !33 ; CHECK-NEXT: [[X5:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[I]], i32 0, !dbg !34 ; CHECK-NEXT: [[Y7:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[I]], i32 1, !dbg !35 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[X5]] to <2 x i64>*, !dbg !36 diff --git a/test/Transforms/SLPVectorizer/X86/PR31847.ll b/test/Transforms/SLPVectorizer/X86/PR31847.ll deleted file mode 100644 index 2fe342430de..00000000000 --- a/test/Transforms/SLPVectorizer/X86/PR31847.ll +++ /dev/null @@ -1,153 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -slp-vectorizer -S -o - -mtriple=i386 -mcpu=haswell < %s | FileCheck %s -target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" - -@shift = common local_unnamed_addr global [10 x i32] zeroinitializer, align 4 -@data = common local_unnamed_addr global [10 x i8*] zeroinitializer, align 4 - -define void @flat(i32 %intensity) { -; CHECK-LABEL: @flat( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 0), align 4 -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 1), align 4 -; CHECK-NEXT: [[TMP2:%.*]] = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 0), align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 1), align 4 -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 1, [[TMP0]] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 [[SHR]] -; CHECK-NEXT: [[SHR1:%.*]] = lshr i32 1, [[TMP1]] -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP3]], i32 [[SHR1]] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: [[D1_DATA_046:%.*]] = phi i8* [ [[TMP3]], [[ENTRY:%.*]] ], [ [[ADD_PTR23_1:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[Y_045:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_1:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP4]] to i32 -; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[CONV]], -128 -; CHECK-NEXT: [[TMP5:%.*]] = load i8, i8* [[ARRAYIDX2]], align 1 -; CHECK-NEXT: [[CONV3:%.*]] = zext i8 [[TMP5]] to i32 -; CHECK-NEXT: [[SUB4:%.*]] = add nsw i32 [[CONV3]], -128 -; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[SUB]], -1 -; CHECK-NEXT: [[SUB7:%.*]] = sub nsw i32 128, [[CONV]] -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP5]], i32 [[SUB]], i32 [[SUB7]] -; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[SUB4]], -1 -; CHECK-NEXT: [[SUB12:%.*]] = sub nsw i32 128, [[CONV3]] -; CHECK-NEXT: [[COND14:%.*]] = select i1 [[CMP8]], i32 [[SUB4]], i32 [[SUB12]] -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[COND14]], [[COND]] -; CHECK-NEXT: [[IDX_NEG:%.*]] = sub nsw i32 0, [[ADD]] -; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[IDX_NEG]] -; CHECK-NEXT: [[TMP6:%.*]] = load i8, i8* [[ADD_PTR]], align 1 -; CHECK-NEXT: [[CONV15:%.*]] = zext i8 [[TMP6]] to i32 -; CHECK-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], [[INTENSITY:%.*]] -; CHECK-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -; CHECK-NEXT: store i8 [[CONV17]], i8* [[ADD_PTR]], align 1 -; CHECK-NEXT: [[ADD_PTR18:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[ADD]] -; CHECK-NEXT: [[TMP7:%.*]] = load i8, i8* [[ADD_PTR18]], align 1 -; CHECK-NEXT: [[NOT_TOBOOL:%.*]] = icmp eq i8 [[TMP7]], 0 -; CHECK-NEXT: [[CONV21:%.*]] = zext i1 [[NOT_TOBOOL]] to i8 -; CHECK-NEXT: store i8 [[CONV21]], i8* [[ADD_PTR18]], align 1 -; CHECK-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[TMP1]] -; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CONV_1:%.*]] = zext i8 [[TMP8]] to i32 -; CHECK-NEXT: [[SUB_1:%.*]] = add nsw i32 [[CONV_1]], -128 -; CHECK-NEXT: [[TMP9:%.*]] = load i8, i8* [[ARRAYIDX2]], align 1 -; CHECK-NEXT: [[CONV3_1:%.*]] = zext i8 [[TMP9]] to i32 -; CHECK-NEXT: [[SUB4_1:%.*]] = add nsw i32 [[CONV3_1]], -128 -; CHECK-NEXT: [[CMP5_1:%.*]] = icmp sgt i32 [[SUB_1]], -1 -; CHECK-NEXT: [[SUB7_1:%.*]] = sub nsw i32 128, [[CONV_1]] -; CHECK-NEXT: [[COND_1:%.*]] = select i1 [[CMP5_1]], i32 [[SUB_1]], i32 [[SUB7_1]] -; CHECK-NEXT: [[CMP8_1:%.*]] = icmp sgt i32 [[SUB4_1]], -1 -; CHECK-NEXT: [[SUB12_1:%.*]] = sub nsw i32 128, [[CONV3_1]] -; CHECK-NEXT: [[COND14_1:%.*]] = select i1 [[CMP8_1]], i32 [[SUB4_1]], i32 [[SUB12_1]] -; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[COND14_1]], [[COND_1]] -; CHECK-NEXT: [[IDX_NEG_1:%.*]] = sub nsw i32 0, [[ADD_1]] -; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[IDX_NEG_1]] -; CHECK-NEXT: [[TMP10:%.*]] = load i8, i8* [[ADD_PTR_1]], align 1 -; CHECK-NEXT: [[CONV15_1:%.*]] = zext i8 [[TMP10]] to i32 -; CHECK-NEXT: [[ADD16_1:%.*]] = add nsw i32 [[CONV15_1]], [[INTENSITY]] -; CHECK-NEXT: [[CONV17_1:%.*]] = trunc i32 [[ADD16_1]] to i8 -; CHECK-NEXT: store i8 [[CONV17_1]], i8* [[ADD_PTR_1]], align 1 -; CHECK-NEXT: [[ADD_PTR18_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[ADD_1]] -; CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[ADD_PTR18_1]], align 1 -; CHECK-NEXT: [[NOT_TOBOOL_1:%.*]] = icmp eq i8 [[TMP11]], 0 -; CHECK-NEXT: [[CONV21_1:%.*]] = zext i1 [[NOT_TOBOOL_1]] to i8 -; CHECK-NEXT: store i8 [[CONV21_1]], i8* [[ADD_PTR18_1]], align 1 -; CHECK-NEXT: [[ADD_PTR23_1]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[TMP1]] -; CHECK-NEXT: [[INC_1]] = add nsw i32 [[Y_045]], 2 -; CHECK-NEXT: [[EXITCOND_1:%.*]] = icmp eq i32 [[INC_1]], 128 -; CHECK-NEXT: br i1 [[EXITCOND_1]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] -; -entry: - %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 0), align 4 - %1 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 1), align 4 - %2 = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 0), align 4 - %3 = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 1), align 4 - %shr = lshr i32 1, %0 - %arrayidx = getelementptr inbounds i8, i8* %2, i32 %shr - %shr1 = lshr i32 1, %1 - %arrayidx2 = getelementptr inbounds i8, i8* %3, i32 %shr1 - br label %for.body - -for.cond.cleanup: ; preds = %for.body - ret void - -for.body: ; preds = %for.body, %entry - %d1_data.046 = phi i8* [ %3, %entry ], [ %add.ptr23.1, %for.body ] - %y.045 = phi i32 [ 0, %entry ], [ %inc.1, %for.body ] - %4 = load i8, i8* %arrayidx, align 1 - %conv = zext i8 %4 to i32 - %sub = add nsw i32 %conv, -128 - %5 = load i8, i8* %arrayidx2, align 1 - %conv3 = zext i8 %5 to i32 - %sub4 = add nsw i32 %conv3, -128 - %cmp5 = icmp sgt i32 %sub, -1 - %sub7 = sub nsw i32 128, %conv - %cond = select i1 %cmp5, i32 %sub, i32 %sub7 - %cmp8 = icmp sgt i32 %sub4, -1 - %sub12 = sub nsw i32 128, %conv3 - %cond14 = select i1 %cmp8, i32 %sub4, i32 %sub12 - %add = add nsw i32 %cond14, %cond - %idx.neg = sub nsw i32 0, %add - %add.ptr = getelementptr inbounds i8, i8* %d1_data.046, i32 %idx.neg - %6 = load i8, i8* %add.ptr, align 1 - %conv15 = zext i8 %6 to i32 - %add16 = add nsw i32 %conv15, %intensity - %conv17 = trunc i32 %add16 to i8 - store i8 %conv17, i8* %add.ptr, align 1 - %add.ptr18 = getelementptr inbounds i8, i8* %d1_data.046, i32 %add - %7 = load i8, i8* %add.ptr18, align 1 - %not.tobool = icmp eq i8 %7, 0 - %conv21 = zext i1 %not.tobool to i8 - store i8 %conv21, i8* %add.ptr18, align 1 - %add.ptr23 = getelementptr inbounds i8, i8* %d1_data.046, i32 %1 - %8 = load i8, i8* %arrayidx, align 1 - %conv.1 = zext i8 %8 to i32 - %sub.1 = add nsw i32 %conv.1, -128 - %9 = load i8, i8* %arrayidx2, align 1 - %conv3.1 = zext i8 %9 to i32 - %sub4.1 = add nsw i32 %conv3.1, -128 - %cmp5.1 = icmp sgt i32 %sub.1, -1 - %sub7.1 = sub nsw i32 128, %conv.1 - %cond.1 = select i1 %cmp5.1, i32 %sub.1, i32 %sub7.1 - %cmp8.1 = icmp sgt i32 %sub4.1, -1 - %sub12.1 = sub nsw i32 128, %conv3.1 - %cond14.1 = select i1 %cmp8.1, i32 %sub4.1, i32 %sub12.1 - %add.1 = add nsw i32 %cond14.1, %cond.1 - %idx.neg.1 = sub nsw i32 0, %add.1 - %add.ptr.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %idx.neg.1 - %10 = load i8, i8* %add.ptr.1, align 1 - %conv15.1 = zext i8 %10 to i32 - %add16.1 = add nsw i32 %conv15.1, %intensity - %conv17.1 = trunc i32 %add16.1 to i8 - store i8 %conv17.1, i8* %add.ptr.1, align 1 - %add.ptr18.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %add.1 - %11 = load i8, i8* %add.ptr18.1, align 1 - %not.tobool.1 = icmp eq i8 %11, 0 - %conv21.1 = zext i1 %not.tobool.1 to i8 - store i8 %conv21.1, i8* %add.ptr18.1, align 1 - %add.ptr23.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %1 - %inc.1 = add nsw i32 %y.045, 2 - %exitcond.1 = icmp eq i32 %inc.1, 128 - br i1 %exitcond.1, label %for.cond.cleanup, label %for.body -} diff --git a/test/Transforms/SLPVectorizer/X86/PR35628_1.ll b/test/Transforms/SLPVectorizer/X86/PR35628_1.ll index e033ed768ca..625748f6820 100644 --- a/test/Transforms/SLPVectorizer/X86/PR35628_1.ll +++ b/test/Transforms/SLPVectorizer/X86/PR35628_1.ll @@ -18,16 +18,23 @@ define void @mainTest(i32* %ptr) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[TMP4]], [[TMP4]] -; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP6]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = add i32 1, undef +; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], undef +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], [[TMP6]] +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], undef +; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP6]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP13]], [[TMP5]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP8]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]] -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 -; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP10]], 1 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 +; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP16]], 1 ; CHECK-NEXT: [[OP_EXTRA3:%.*]] = add i32 [[OP_EXTRA]], [[TMP7]] ; CHECK-NEXT: [[OP_EXTRA4:%.*]] = add i32 [[OP_EXTRA3]], [[TMP6]] ; CHECK-NEXT: [[OP_EXTRA5]] = add i32 [[OP_EXTRA4]], [[TMP5]] +; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP15]], undef ; CHECK-NEXT: br label [[LOOP]] ; CHECK: bail_out: ; CHECK-NEXT: ret void diff --git a/test/Transforms/SLPVectorizer/X86/PR35628_2.ll b/test/Transforms/SLPVectorizer/X86/PR35628_2.ll index 4916d283bf3..712ff040a91 100644 --- a/test/Transforms/SLPVectorizer/X86/PR35628_2.ll +++ b/test/Transforms/SLPVectorizer/X86/PR35628_2.ll @@ -20,6 +20,10 @@ define void @test() #0 { ; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP7]], 32 ; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> , [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = ashr exact <4 x i64> [[TMP8]], +; CHECK-NEXT: [[SUM1:%.*]] = add i64 undef, undef +; CHECK-NEXT: [[SUM2:%.*]] = add i64 [[SUM1]], undef +; CHECK-NEXT: [[ZSUM:%.*]] = add i64 [[SUM2]], 0 +; CHECK-NEXT: [[JOIN:%.*]] = add i64 [[TMP6]], [[ZSUM]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i64> [[TMP9]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]], <4 x i64> undef, <4 x i32> @@ -27,6 +31,7 @@ define void @test() #0 { ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[BIN_RDX2]], i32 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i64 [[TMP10]], 0 ; CHECK-NEXT: [[OP_EXTRA3]] = add i64 [[OP_EXTRA]], [[TMP6]] +; CHECK-NEXT: [[LAST:%.*]] = add i64 [[JOIN]], undef ; CHECK-NEXT: br label [[LOOP]] ; entry: diff --git a/test/Transforms/SLPVectorizer/X86/PR39774.ll b/test/Transforms/SLPVectorizer/X86/PR39774.ll index db02d64ba93..24f75b32c5d 100644 --- a/test/Transforms/SLPVectorizer/X86/PR39774.ll +++ b/test/Transforms/SLPVectorizer/X86/PR39774.ll @@ -11,6 +11,40 @@ define void @Test(i32) { ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef +; CHECK-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]] +; CHECK-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]] +; CHECK-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]] +; CHECK-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]] +; CHECK-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], undef +; CHECK-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]] +; CHECK-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]] +; CHECK-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]] +; CHECK-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], undef +; CHECK-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]] +; CHECK-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]] +; CHECK-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]] +; CHECK-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]] +; CHECK-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]] +; CHECK-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], undef +; CHECK-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], undef +; CHECK-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]] +; CHECK-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]] +; CHECK-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]] +; CHECK-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]] +; CHECK-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]] +; CHECK-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]] +; CHECK-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]] +; CHECK-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]] +; CHECK-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]] +; CHECK-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]] +; CHECK-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]] +; CHECK-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]] +; CHECK-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], undef +; CHECK-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]] +; CHECK-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]] +; CHECK-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]] +; CHECK-NEXT: [[VAL_40:%.*]] = and i32 [[VAL_38]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = and <8 x i32> [[TMP3]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -18,7 +52,7 @@ define void @Test(i32) { ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP4]], [[TMP0:%.*]] +; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP4]], [[TMP0]] ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]] ; CHECK-NEXT: [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]] ; CHECK-NEXT: [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]] @@ -45,6 +79,7 @@ define void @Test(i32) { ; CHECK-NEXT: [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]] ; CHECK-NEXT: [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP0]] ; CHECK-NEXT: [[OP_EXTRA30:%.*]] = and i32 [[OP_EXTRA29]], [[TMP0]] +; CHECK-NEXT: [[VAL_42:%.*]] = and i32 [[VAL_40]], undef ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> undef, i32 [[OP_EXTRA30]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 14910, i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0 @@ -66,8 +101,40 @@ define void @Test(i32) { ; FORCE_REDUCTION-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <4 x i32> ; FORCE_REDUCTION-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1 ; FORCE_REDUCTION-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHUFFLE]], +; FORCE_REDUCTION-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef +; FORCE_REDUCTION-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]] +; FORCE_REDUCTION-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], undef +; FORCE_REDUCTION-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], undef +; FORCE_REDUCTION-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], undef ; FORCE_REDUCTION-NEXT: [[VAL_20:%.*]] = add i32 [[TMP2]], 1496 +; FORCE_REDUCTION-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], [[VAL_20]] +; FORCE_REDUCTION-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]] ; FORCE_REDUCTION-NEXT: [[VAL_34:%.*]] = add i32 [[TMP2]], 8555 +; FORCE_REDUCTION-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], [[VAL_34]] +; FORCE_REDUCTION-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]] ; FORCE_REDUCTION-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> undef, <4 x i32> ; FORCE_REDUCTION-NEXT: [[BIN_RDX:%.*]] = and <4 x i32> [[TMP3]], [[RDX_SHUF]] ; FORCE_REDUCTION-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> @@ -75,7 +142,7 @@ define void @Test(i32) { ; FORCE_REDUCTION-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 ; FORCE_REDUCTION-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], [[VAL_20]] ; FORCE_REDUCTION-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], [[VAL_34]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP6]], [[TMP0:%.*]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP6]], [[TMP0]] ; FORCE_REDUCTION-NEXT: [[OP_EXTRA3:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]] ; FORCE_REDUCTION-NEXT: [[OP_EXTRA4:%.*]] = and i32 [[OP_EXTRA3]], [[TMP0]] ; FORCE_REDUCTION-NEXT: [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA4]], [[TMP0]] @@ -103,6 +170,7 @@ define void @Test(i32) { ; FORCE_REDUCTION-NEXT: [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]], [[TMP0]] ; FORCE_REDUCTION-NEXT: [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]] ; FORCE_REDUCTION-NEXT: [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP2]] +; FORCE_REDUCTION-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]] ; FORCE_REDUCTION-NEXT: [[VAL_39:%.*]] = add i32 [[TMP2]], 12529 ; FORCE_REDUCTION-NEXT: [[VAL_40:%.*]] = and i32 [[OP_EXTRA29]], [[VAL_39]] ; FORCE_REDUCTION-NEXT: [[VAL_41:%.*]] = add i32 [[TMP2]], 13685 diff --git a/test/Transforms/SLPVectorizer/X86/PR40310.ll b/test/Transforms/SLPVectorizer/X86/PR40310.ll index 4622c77e8c8..2a0b66ee281 100644 --- a/test/Transforms/SLPVectorizer/X86/PR40310.ll +++ b/test/Transforms/SLPVectorizer/X86/PR40310.ll @@ -13,6 +13,21 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) { ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 15 ; CHECK-NEXT: store atomic i32 [[TMP3]], i32* [[VALS:%.*]] unordered, align 4 ; CHECK-NEXT: [[TMP4:%.*]] = add <16 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[V14:%.*]] = and i32 [[TMP2]], undef +; CHECK-NEXT: [[V16:%.*]] = and i32 undef, [[V14]] +; CHECK-NEXT: [[V18:%.*]] = and i32 undef, [[V16]] +; CHECK-NEXT: [[V20:%.*]] = and i32 undef, [[V18]] +; CHECK-NEXT: [[V22:%.*]] = and i32 undef, [[V20]] +; CHECK-NEXT: [[V24:%.*]] = and i32 undef, [[V22]] +; CHECK-NEXT: [[V26:%.*]] = and i32 undef, [[V24]] +; CHECK-NEXT: [[V28:%.*]] = and i32 undef, [[V26]] +; CHECK-NEXT: [[V30:%.*]] = and i32 undef, [[V28]] +; CHECK-NEXT: [[V32:%.*]] = and i32 undef, [[V30]] +; CHECK-NEXT: [[V34:%.*]] = and i32 undef, [[V32]] +; CHECK-NEXT: [[V36:%.*]] = and i32 undef, [[V34]] +; CHECK-NEXT: [[V38:%.*]] = and i32 undef, [[V36]] +; CHECK-NEXT: [[V40:%.*]] = and i32 undef, [[V38]] +; CHECK-NEXT: [[V42:%.*]] = and i32 undef, [[V40]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> undef, <16 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = and <16 x i32> [[TMP4]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[BIN_RDX]], <16 x i32> undef, <16 x i32> @@ -23,6 +38,7 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) { ; CHECK-NEXT: [[BIN_RDX6:%.*]] = and <16 x i32> [[BIN_RDX4]], [[RDX_SHUF5]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x i32> [[BIN_RDX6]], i32 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP5]], [[TMP2]] +; CHECK-NEXT: [[V43:%.*]] = and i32 undef, [[V42]] ; CHECK-NEXT: [[V44:%.*]] = add i32 [[TMP2]], 16 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> undef, i32 [[V44]], i32 0 ; CHECK-NEXT: [[TMP7]] = insertelement <2 x i32> [[TMP6]], i32 [[OP_EXTRA]], i32 1 diff --git a/test/Transforms/SLPVectorizer/X86/bad-reduction.ll b/test/Transforms/SLPVectorizer/X86/bad-reduction.ll index e3452e194db..8637df4acc1 100644 --- a/test/Transforms/SLPVectorizer/X86/bad-reduction.ll +++ b/test/Transforms/SLPVectorizer/X86/bad-reduction.ll @@ -30,6 +30,12 @@ define i64 @load_bswap(%v8i8* %p) { ; CHECK-NEXT: [[SH4:%.*]] = shl nuw nsw i64 [[Z4]], 24 ; CHECK-NEXT: [[SH5:%.*]] = shl nuw nsw i64 [[Z5]], 16 ; CHECK-NEXT: [[SH6:%.*]] = shl nuw nsw i64 [[Z6]], 8 +; CHECK-NEXT: [[OR01:%.*]] = or i64 undef, undef +; CHECK-NEXT: [[OR012:%.*]] = or i64 [[OR01]], undef +; CHECK-NEXT: [[OR0123:%.*]] = or i64 [[OR012]], undef +; CHECK-NEXT: [[OR01234:%.*]] = or i64 [[OR0123]], [[SH4]] +; CHECK-NEXT: [[OR012345:%.*]] = or i64 [[OR01234]], [[SH5]] +; CHECK-NEXT: [[OR0123456:%.*]] = or i64 [[OR012345]], [[SH6]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <4 x i64> [[TMP4]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]], <4 x i64> undef, <4 x i32> @@ -39,6 +45,7 @@ define i64 @load_bswap(%v8i8* %p) { ; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], [[SH5]] ; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP7]], [[SH6]] ; CHECK-NEXT: [[OP_EXTRA:%.*]] = or i64 [[TMP8]], [[Z7]] +; CHECK-NEXT: [[OR01234567:%.*]] = or i64 [[OR0123456]], [[Z7]] ; CHECK-NEXT: ret i64 [[OP_EXTRA]] ; %g0 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 0 @@ -101,6 +108,12 @@ define i64 @load_bswap_nop_shift(%v8i8* %p) { ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw <8 x i64> [[TMP3]], +; CHECK-NEXT: [[OR01:%.*]] = or i64 undef, undef +; CHECK-NEXT: [[OR012:%.*]] = or i64 [[OR01]], undef +; CHECK-NEXT: [[OR0123:%.*]] = or i64 [[OR012]], undef +; CHECK-NEXT: [[OR01234:%.*]] = or i64 [[OR0123]], undef +; CHECK-NEXT: [[OR012345:%.*]] = or i64 [[OR01234]], undef +; CHECK-NEXT: [[OR0123456:%.*]] = or i64 [[OR012345]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <8 x i64> [[TMP4]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i64> [[BIN_RDX]], <8 x i64> undef, <8 x i32> @@ -108,6 +121,7 @@ define i64 @load_bswap_nop_shift(%v8i8* %p) { ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i64> [[BIN_RDX2]], <8 x i64> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = or <8 x i64> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[BIN_RDX4]], i32 0 +; CHECK-NEXT: [[OR01234567:%.*]] = or i64 [[OR0123456]], undef ; CHECK-NEXT: ret i64 [[TMP5]] ; %g0 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 0 @@ -182,6 +196,12 @@ define i64 @load64le(i8* %arg) { ; CHECK-NEXT: [[S5:%.*]] = shl nuw nsw i64 [[Z5]], 40 ; CHECK-NEXT: [[S6:%.*]] = shl nuw nsw i64 [[Z6]], 48 ; CHECK-NEXT: [[S7:%.*]] = shl nuw i64 [[Z7]], 56 +; CHECK-NEXT: [[O1:%.*]] = or i64 undef, [[Z0]] +; CHECK-NEXT: [[O2:%.*]] = or i64 [[O1]], undef +; CHECK-NEXT: [[O3:%.*]] = or i64 [[O2]], undef +; CHECK-NEXT: [[O4:%.*]] = or i64 [[O3]], undef +; CHECK-NEXT: [[O5:%.*]] = or i64 [[O4]], [[S5]] +; CHECK-NEXT: [[O6:%.*]] = or i64 [[O5]], [[S6]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <4 x i64> [[TMP4]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]], <4 x i64> undef, <4 x i32> @@ -191,6 +211,7 @@ define i64 @load64le(i8* %arg) { ; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], [[S6]] ; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP7]], [[S7]] ; CHECK-NEXT: [[OP_EXTRA:%.*]] = or i64 [[TMP8]], [[Z0]] +; CHECK-NEXT: [[O7:%.*]] = or i64 [[O6]], [[S7]] ; CHECK-NEXT: ret i64 [[OP_EXTRA]] ; %g1 = getelementptr inbounds i8, i8* %arg, i64 1 @@ -251,6 +272,12 @@ define i64 @load64le_nop_shift(i8* %arg) { ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw <8 x i64> [[TMP3]], +; CHECK-NEXT: [[O1:%.*]] = or i64 undef, undef +; CHECK-NEXT: [[O2:%.*]] = or i64 [[O1]], undef +; CHECK-NEXT: [[O3:%.*]] = or i64 [[O2]], undef +; CHECK-NEXT: [[O4:%.*]] = or i64 [[O3]], undef +; CHECK-NEXT: [[O5:%.*]] = or i64 [[O4]], undef +; CHECK-NEXT: [[O6:%.*]] = or i64 [[O5]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <8 x i64> [[TMP4]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i64> [[BIN_RDX]], <8 x i64> undef, <8 x i32> @@ -258,6 +285,7 @@ define i64 @load64le_nop_shift(i8* %arg) { ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i64> [[BIN_RDX2]], <8 x i64> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = or <8 x i64> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[BIN_RDX4]], i32 0 +; CHECK-NEXT: [[O7:%.*]] = or i64 [[O6]], undef ; CHECK-NEXT: ret i64 [[TMP5]] ; %g1 = getelementptr inbounds i8, i8* %arg, i64 1 diff --git a/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/test/Transforms/SLPVectorizer/X86/horizontal-list.ll index 1a2c66c6879..6c3994b0a22 100644 --- a/test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ b/test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -100,8 +100,16 @@ define float @bazz() { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr to <8 x float>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]] +; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]] +; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] +; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] ; CHECK-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2 ; CHECK-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float +; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]] +; CHECK-NEXT: [[ADD19:%.*]] = fadd fast float undef, [[ADD7]] +; CHECK-NEXT: [[ADD19_1:%.*]] = fadd fast float undef, [[ADD19]] +; CHECK-NEXT: [[ADD19_2:%.*]] = fadd fast float undef, [[ADD19_1]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP3]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -111,6 +119,7 @@ define float @bazz() { ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]] ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV6]] +; CHECK-NEXT: [[ADD19_3:%.*]] = fadd fast float undef, [[ADD19_2]] ; CHECK-NEXT: store float [[OP_EXTRA5]], float* @res, align 4 ; CHECK-NEXT: ret float [[OP_EXTRA5]] ; @@ -122,8 +131,16 @@ define float @bazz() { ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr to <8 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]] +; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]] +; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]] +; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] +; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] ; THRESHOLD-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2 ; THRESHOLD-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float +; THRESHOLD-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]] +; THRESHOLD-NEXT: [[ADD19:%.*]] = fadd fast float undef, [[ADD7]] +; THRESHOLD-NEXT: [[ADD19_1:%.*]] = fadd fast float undef, [[ADD19]] +; THRESHOLD-NEXT: [[ADD19_2:%.*]] = fadd fast float undef, [[ADD19_1]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP3]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -133,6 +150,7 @@ define float @bazz() { ; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]] ; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV6]] +; THRESHOLD-NEXT: [[ADD19_3:%.*]] = fadd fast float undef, [[ADD19_2]] ; THRESHOLD-NEXT: store float [[OP_EXTRA5]], float* @res, align 4 ; THRESHOLD-NEXT: ret float [[OP_EXTRA5]] ; @@ -187,14 +205,17 @@ define float @bazzz() { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef +; CHECK-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] -; CHECK-NEXT: store float [[TMP5]], float* @res, align 4 -; CHECK-NEXT: ret float [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]] +; CHECK-NEXT: store float [[TMP8]], float* @res, align 4 +; CHECK-NEXT: ret float [[TMP8]] ; ; THRESHOLD-LABEL: @bazzz( ; THRESHOLD-NEXT: entry: @@ -203,14 +224,17 @@ define float @bazzz() { ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] +; THRESHOLD-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef +; THRESHOLD-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] -; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 -; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] -; THRESHOLD-NEXT: store float [[TMP5]], float* @res, align 4 -; THRESHOLD-NEXT: ret float [[TMP5]] +; THRESHOLD-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 +; THRESHOLD-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]] +; THRESHOLD-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]] +; THRESHOLD-NEXT: store float [[TMP8]], float* @res, align 4 +; THRESHOLD-NEXT: ret float [[TMP8]] ; entry: %0 = load i32, i32* @n, align 4 @@ -243,13 +267,16 @@ define i32 @foo() { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef +; CHECK-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] -; CHECK-NEXT: [[CONV4:%.*]] = fptosi float [[TMP5]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]] +; CHECK-NEXT: [[CONV4:%.*]] = fptosi float [[TMP8]] to i32 ; CHECK-NEXT: store i32 [[CONV4]], i32* @n, align 4 ; CHECK-NEXT: ret i32 [[CONV4]] ; @@ -260,13 +287,16 @@ define i32 @foo() { ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] +; THRESHOLD-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef +; THRESHOLD-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] -; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 -; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] -; THRESHOLD-NEXT: [[CONV4:%.*]] = fptosi float [[TMP5]] to i32 +; THRESHOLD-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 +; THRESHOLD-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]] +; THRESHOLD-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]] +; THRESHOLD-NEXT: [[CONV4:%.*]] = fptosi float [[TMP8]] to i32 ; THRESHOLD-NEXT: store i32 [[CONV4]], i32* @n, align 4 ; THRESHOLD-NEXT: ret i32 [[CONV4]] ; @@ -300,6 +330,11 @@ define float @bar() { ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[CMP4:%.*]] = fcmp fast ogt float undef, undef +; CHECK-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float undef, float undef +; CHECK-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], undef +; CHECK-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float undef +; CHECK-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float> [[TMP2]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP2]], <4 x float> [[RDX_SHUF]] @@ -307,6 +342,7 @@ define float @bar() { ; CHECK-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF1]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT3]], i32 0 +; CHECK-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float undef ; CHECK-NEXT: store float [[TMP3]], float* @res, align 4 ; CHECK-NEXT: ret float [[TMP3]] ; @@ -315,6 +351,11 @@ define float @bar() { ; THRESHOLD-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]] +; THRESHOLD-NEXT: [[CMP4:%.*]] = fcmp fast ogt float undef, undef +; THRESHOLD-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float undef, float undef +; THRESHOLD-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], undef +; THRESHOLD-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float undef +; THRESHOLD-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], undef ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float> [[TMP2]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP2]], <4 x float> [[RDX_SHUF]] @@ -322,6 +363,7 @@ define float @bar() { ; THRESHOLD-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; THRESHOLD-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF1]] ; THRESHOLD-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT3]], i32 0 +; THRESHOLD-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float undef ; THRESHOLD-NEXT: store float [[TMP3]], float* @res, align 4 ; THRESHOLD-NEXT: ret float [[TMP3]] ; @@ -368,6 +410,21 @@ define float @f(float* nocapture readonly %x) { ; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4 +; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float undef, undef +; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] +; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] +; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] +; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] +; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] +; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] +; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] +; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] +; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] +; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] +; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] +; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] +; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] +; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] ; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 ; CHECK-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 ; CHECK-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 @@ -402,6 +459,37 @@ define float @f(float* nocapture readonly %x) { ; CHECK-NEXT: [[ARRAYIDX_47:%.*]] = getelementptr inbounds float, float* [[X]], i64 47 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>* ; CHECK-NEXT: [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4 +; CHECK-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] +; CHECK-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] +; CHECK-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] +; CHECK-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] +; CHECK-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] +; CHECK-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] +; CHECK-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] +; CHECK-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] +; CHECK-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] +; CHECK-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] +; CHECK-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] +; CHECK-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] +; CHECK-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] +; CHECK-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] +; CHECK-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]] +; CHECK-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]] +; CHECK-NEXT: [[ADD_32:%.*]] = fadd fast float undef, [[ADD_31]] +; CHECK-NEXT: [[ADD_33:%.*]] = fadd fast float undef, [[ADD_32]] +; CHECK-NEXT: [[ADD_34:%.*]] = fadd fast float undef, [[ADD_33]] +; CHECK-NEXT: [[ADD_35:%.*]] = fadd fast float undef, [[ADD_34]] +; CHECK-NEXT: [[ADD_36:%.*]] = fadd fast float undef, [[ADD_35]] +; CHECK-NEXT: [[ADD_37:%.*]] = fadd fast float undef, [[ADD_36]] +; CHECK-NEXT: [[ADD_38:%.*]] = fadd fast float undef, [[ADD_37]] +; CHECK-NEXT: [[ADD_39:%.*]] = fadd fast float undef, [[ADD_38]] +; CHECK-NEXT: [[ADD_40:%.*]] = fadd fast float undef, [[ADD_39]] +; CHECK-NEXT: [[ADD_41:%.*]] = fadd fast float undef, [[ADD_40]] +; CHECK-NEXT: [[ADD_42:%.*]] = fadd fast float undef, [[ADD_41]] +; CHECK-NEXT: [[ADD_43:%.*]] = fadd fast float undef, [[ADD_42]] +; CHECK-NEXT: [[ADD_44:%.*]] = fadd fast float undef, [[ADD_43]] +; CHECK-NEXT: [[ADD_45:%.*]] = fadd fast float undef, [[ADD_44]] +; CHECK-NEXT: [[ADD_46:%.*]] = fadd fast float undef, [[ADD_45]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP3]], <32 x float> undef, <32 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP3]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> @@ -423,6 +511,7 @@ define float @f(float* nocapture readonly %x) { ; CHECK-NEXT: [[BIN_RDX16:%.*]] = fadd fast <16 x float> [[BIN_RDX14]], [[RDX_SHUF15]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x float> [[BIN_RDX16]], i32 0 ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[ADD_47:%.*]] = fadd fast float undef, [[ADD_46]] ; CHECK-NEXT: ret float [[OP_RDX]] ; ; THRESHOLD-LABEL: @f( @@ -444,6 +533,21 @@ define float @f(float* nocapture readonly %x) { ; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4 +; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float undef, undef +; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] +; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] +; THRESHOLD-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] +; THRESHOLD-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] +; THRESHOLD-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] +; THRESHOLD-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] +; THRESHOLD-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] +; THRESHOLD-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] +; THRESHOLD-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] +; THRESHOLD-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] +; THRESHOLD-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] +; THRESHOLD-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] +; THRESHOLD-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] +; THRESHOLD-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] ; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 ; THRESHOLD-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 ; THRESHOLD-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 @@ -478,6 +582,37 @@ define float @f(float* nocapture readonly %x) { ; THRESHOLD-NEXT: [[ARRAYIDX_47:%.*]] = getelementptr inbounds float, float* [[X]], i64 47 ; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>* ; THRESHOLD-NEXT: [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4 +; THRESHOLD-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] +; THRESHOLD-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] +; THRESHOLD-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] +; THRESHOLD-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] +; THRESHOLD-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] +; THRESHOLD-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] +; THRESHOLD-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] +; THRESHOLD-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] +; THRESHOLD-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] +; THRESHOLD-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] +; THRESHOLD-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] +; THRESHOLD-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] +; THRESHOLD-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] +; THRESHOLD-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] +; THRESHOLD-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]] +; THRESHOLD-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]] +; THRESHOLD-NEXT: [[ADD_32:%.*]] = fadd fast float undef, [[ADD_31]] +; THRESHOLD-NEXT: [[ADD_33:%.*]] = fadd fast float undef, [[ADD_32]] +; THRESHOLD-NEXT: [[ADD_34:%.*]] = fadd fast float undef, [[ADD_33]] +; THRESHOLD-NEXT: [[ADD_35:%.*]] = fadd fast float undef, [[ADD_34]] +; THRESHOLD-NEXT: [[ADD_36:%.*]] = fadd fast float undef, [[ADD_35]] +; THRESHOLD-NEXT: [[ADD_37:%.*]] = fadd fast float undef, [[ADD_36]] +; THRESHOLD-NEXT: [[ADD_38:%.*]] = fadd fast float undef, [[ADD_37]] +; THRESHOLD-NEXT: [[ADD_39:%.*]] = fadd fast float undef, [[ADD_38]] +; THRESHOLD-NEXT: [[ADD_40:%.*]] = fadd fast float undef, [[ADD_39]] +; THRESHOLD-NEXT: [[ADD_41:%.*]] = fadd fast float undef, [[ADD_40]] +; THRESHOLD-NEXT: [[ADD_42:%.*]] = fadd fast float undef, [[ADD_41]] +; THRESHOLD-NEXT: [[ADD_43:%.*]] = fadd fast float undef, [[ADD_42]] +; THRESHOLD-NEXT: [[ADD_44:%.*]] = fadd fast float undef, [[ADD_43]] +; THRESHOLD-NEXT: [[ADD_45:%.*]] = fadd fast float undef, [[ADD_44]] +; THRESHOLD-NEXT: [[ADD_46:%.*]] = fadd fast float undef, [[ADD_45]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP3]], <32 x float> undef, <32 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP3]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> @@ -499,6 +634,7 @@ define float @f(float* nocapture readonly %x) { ; THRESHOLD-NEXT: [[BIN_RDX16:%.*]] = fadd fast <16 x float> [[BIN_RDX14]], [[RDX_SHUF15]] ; THRESHOLD-NEXT: [[TMP5:%.*]] = extractelement <16 x float> [[BIN_RDX16]], i32 0 ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]] +; THRESHOLD-NEXT: [[ADD_47:%.*]] = fadd fast float undef, [[ADD_46]] ; THRESHOLD-NEXT: ret float [[OP_RDX]] ; entry: @@ -685,6 +821,37 @@ define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) { ; CHECK-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]] +; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]] +; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] +; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] +; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] +; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] +; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] +; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] +; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] +; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] +; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] +; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] +; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] +; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] +; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] +; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] +; CHECK-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] +; CHECK-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] +; CHECK-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] +; CHECK-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] +; CHECK-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] +; CHECK-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] +; CHECK-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] +; CHECK-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] +; CHECK-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] +; CHECK-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] +; CHECK-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] +; CHECK-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] +; CHECK-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] +; CHECK-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] +; CHECK-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP1]], <32 x float> undef, <32 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> @@ -697,6 +864,7 @@ define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) { ; CHECK-NEXT: [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]] +; CHECK-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]] ; CHECK-NEXT: ret float [[OP_EXTRA]] ; ; THRESHOLD-LABEL: @f1( @@ -736,6 +904,37 @@ define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) { ; THRESHOLD-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4 +; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]] +; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]] +; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] +; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] +; THRESHOLD-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] +; THRESHOLD-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] +; THRESHOLD-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] +; THRESHOLD-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] +; THRESHOLD-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] +; THRESHOLD-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] +; THRESHOLD-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] +; THRESHOLD-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] +; THRESHOLD-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] +; THRESHOLD-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] +; THRESHOLD-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] +; THRESHOLD-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] +; THRESHOLD-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] +; THRESHOLD-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] +; THRESHOLD-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] +; THRESHOLD-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] +; THRESHOLD-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] +; THRESHOLD-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] +; THRESHOLD-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] +; THRESHOLD-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] +; THRESHOLD-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] +; THRESHOLD-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] +; THRESHOLD-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] +; THRESHOLD-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] +; THRESHOLD-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] +; THRESHOLD-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] +; THRESHOLD-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP1]], <32 x float> undef, <32 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP1]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> @@ -748,6 +947,7 @@ define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) { ; THRESHOLD-NEXT: [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]] ; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]] +; THRESHOLD-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]] ; THRESHOLD-NEXT: ret float [[OP_EXTRA]] ; entry: @@ -858,12 +1058,17 @@ define float @loadadd31(float* nocapture readonly %x) { ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP1]], [[TMP0]] ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 ; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 ; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 +; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] +; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] +; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] +; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] ; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 ; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 @@ -874,6 +1079,14 @@ define float @loadadd31(float* nocapture readonly %x) { ; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>* ; CHECK-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 +; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] +; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] +; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] +; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] +; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] +; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] +; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] +; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] ; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 ; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 ; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 @@ -892,6 +1105,21 @@ define float @loadadd31(float* nocapture readonly %x) { ; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>* ; CHECK-NEXT: [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4 +; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] +; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] +; CHECK-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] +; CHECK-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] +; CHECK-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] +; CHECK-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] +; CHECK-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] +; CHECK-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] +; CHECK-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] +; CHECK-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] +; CHECK-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] +; CHECK-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] +; CHECK-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] +; CHECK-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] +; CHECK-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> undef, <16 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP7]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[BIN_RDX]], <16 x float> undef, <16 x i32> @@ -917,6 +1145,7 @@ define float @loadadd31(float* nocapture readonly %x) { ; CHECK-NEXT: [[OP_RDX17:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd fast float [[OP_RDX17]], [[TMP1]] ; CHECK-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]] +; CHECK-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] ; CHECK-NEXT: ret float [[TMP12]] ; ; THRESHOLD-LABEL: @loadadd31( @@ -925,12 +1154,17 @@ define float @loadadd31(float* nocapture readonly %x) { ; THRESHOLD-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4 ; THRESHOLD-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4 +; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP1]], [[TMP0]] ; THRESHOLD-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; THRESHOLD-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 ; THRESHOLD-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 ; THRESHOLD-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 ; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>* ; THRESHOLD-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 +; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] +; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] +; THRESHOLD-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] +; THRESHOLD-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] ; THRESHOLD-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; THRESHOLD-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 ; THRESHOLD-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 @@ -941,6 +1175,14 @@ define float @loadadd31(float* nocapture readonly %x) { ; THRESHOLD-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 ; THRESHOLD-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 +; THRESHOLD-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] +; THRESHOLD-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] +; THRESHOLD-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] +; THRESHOLD-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] +; THRESHOLD-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] +; THRESHOLD-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] +; THRESHOLD-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] +; THRESHOLD-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] ; THRESHOLD-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 ; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 ; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 @@ -959,6 +1201,21 @@ define float @loadadd31(float* nocapture readonly %x) { ; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 ; THRESHOLD-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>* ; THRESHOLD-NEXT: [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4 +; THRESHOLD-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] +; THRESHOLD-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] +; THRESHOLD-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] +; THRESHOLD-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] +; THRESHOLD-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] +; THRESHOLD-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] +; THRESHOLD-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] +; THRESHOLD-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] +; THRESHOLD-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] +; THRESHOLD-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] +; THRESHOLD-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] +; THRESHOLD-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] +; THRESHOLD-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] +; THRESHOLD-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] +; THRESHOLD-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> undef, <16 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP7]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[BIN_RDX]], <16 x float> undef, <16 x i32> @@ -984,6 +1241,7 @@ define float @loadadd31(float* nocapture readonly %x) { ; THRESHOLD-NEXT: [[OP_RDX17:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]] ; THRESHOLD-NEXT: [[TMP11:%.*]] = fadd fast float [[OP_RDX17]], [[TMP1]] ; THRESHOLD-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]] +; THRESHOLD-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] ; THRESHOLD-NEXT: ret float [[TMP12]] ; entry: @@ -1094,6 +1352,14 @@ define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) { ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 +; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] +; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] +; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4]], [[CONV]] +; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]] +; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]] +; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] +; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]] +; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -1103,6 +1369,7 @@ define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) { ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] +; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] ; CHECK-NEXT: ret float [[OP_EXTRA5]] ; ; THRESHOLD-LABEL: @extra_args( @@ -1119,6 +1386,14 @@ define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) { ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 +; THRESHOLD-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] +; THRESHOLD-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] +; THRESHOLD-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4]], [[CONV]] +; THRESHOLD-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]] +; THRESHOLD-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]] +; THRESHOLD-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] +; THRESHOLD-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]] +; THRESHOLD-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -1128,6 +1403,7 @@ define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) { ; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] +; THRESHOLD-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] ; THRESHOLD-NEXT: ret float [[OP_EXTRA5]] ; entry: @@ -1176,6 +1452,16 @@ define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 +; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] +; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] +; CHECK-NEXT: [[ADD41:%.*]] = fadd fast float [[ADD4]], 5.000000e+00 +; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD41]], [[CONV]] +; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]] +; CHECK-NEXT: [[ADD4_11:%.*]] = fadd fast float [[ADD4_1]], 5.000000e+00 +; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_11]] +; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] +; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]] +; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -1187,6 +1473,7 @@ define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], 5.000000e+00 ; CHECK-NEXT: [[OP_EXTRA6:%.*]] = fadd fast float [[OP_EXTRA5]], 5.000000e+00 ; CHECK-NEXT: [[OP_EXTRA7:%.*]] = fadd fast float [[OP_EXTRA6]], [[CONV]] +; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] ; CHECK-NEXT: ret float [[OP_EXTRA7]] ; ; THRESHOLD-LABEL: @extra_args_same_several_times( @@ -1203,6 +1490,16 @@ define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 +; THRESHOLD-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] +; THRESHOLD-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] +; THRESHOLD-NEXT: [[ADD41:%.*]] = fadd fast float [[ADD4]], 5.000000e+00 +; THRESHOLD-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD41]], [[CONV]] +; THRESHOLD-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]] +; THRESHOLD-NEXT: [[ADD4_11:%.*]] = fadd fast float [[ADD4_1]], 5.000000e+00 +; THRESHOLD-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_11]] +; THRESHOLD-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] +; THRESHOLD-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]] +; THRESHOLD-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -1214,6 +1511,7 @@ define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a ; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], 5.000000e+00 ; THRESHOLD-NEXT: [[OP_EXTRA6:%.*]] = fadd fast float [[OP_EXTRA5]], 5.000000e+00 ; THRESHOLD-NEXT: [[OP_EXTRA7:%.*]] = fadd fast float [[OP_EXTRA6]], [[CONV]] +; THRESHOLD-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] ; THRESHOLD-NEXT: ret float [[OP_EXTRA7]] ; entry: @@ -1266,6 +1564,14 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 +; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] +; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] +; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD4]] +; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]] +; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] +; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_3]], [[CONV]] +; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD5]] +; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -1275,6 +1581,7 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] +; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] ; CHECK-NEXT: ret float [[OP_EXTRA5]] ; ; THRESHOLD-LABEL: @extra_args_no_replace( @@ -1293,6 +1600,14 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 +; THRESHOLD-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] +; THRESHOLD-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] +; THRESHOLD-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD4]] +; THRESHOLD-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]] +; THRESHOLD-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] +; THRESHOLD-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_3]], [[CONV]] +; THRESHOLD-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD5]] +; THRESHOLD-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -1302,6 +1617,7 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b ; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] +; THRESHOLD-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] ; THRESHOLD-NEXT: ret float [[OP_EXTRA5]] ; entry: @@ -1352,6 +1668,10 @@ define i32 @wobble(i32 %arg, i32 %bar) { ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32> +; CHECK-NEXT: [[R1:%.*]] = add nuw i32 [[ARG]], undef +; CHECK-NEXT: [[R2:%.*]] = add nsw i32 [[R1]], undef +; CHECK-NEXT: [[R3:%.*]] = add nsw i32 [[R2]], undef +; CHECK-NEXT: [[R4:%.*]] = add nsw i32 [[R3]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> @@ -1359,6 +1679,7 @@ define i32 @wobble(i32 %arg, i32 %bar) { ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]] ; CHECK-NEXT: [[OP_EXTRA3:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]] +; CHECK-NEXT: [[R5:%.*]] = add nsw i32 [[R4]], [[TMP9]] ; CHECK-NEXT: ret i32 [[OP_EXTRA3]] ; ; THRESHOLD-LABEL: @wobble( @@ -1375,6 +1696,10 @@ define i32 @wobble(i32 %arg, i32 %bar) { ; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 ; THRESHOLD-NEXT: [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer ; THRESHOLD-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32> +; THRESHOLD-NEXT: [[R1:%.*]] = add nuw i32 [[ARG]], undef +; THRESHOLD-NEXT: [[R2:%.*]] = add nsw i32 [[R1]], undef +; THRESHOLD-NEXT: [[R3:%.*]] = add nsw i32 [[R2]], undef +; THRESHOLD-NEXT: [[R4:%.*]] = add nsw i32 [[R3]], undef ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> @@ -1382,6 +1707,7 @@ define i32 @wobble(i32 %arg, i32 %bar) { ; THRESHOLD-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]] ; THRESHOLD-NEXT: [[OP_EXTRA3:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]] +; THRESHOLD-NEXT: [[R5:%.*]] = add nsw i32 [[R4]], [[TMP9]] ; THRESHOLD-NEXT: ret i32 [[OP_EXTRA3]] ; bb: diff --git a/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll index 8b0ae669ae7..8432b910d91 100644 --- a/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll +++ b/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll @@ -12,6 +12,19 @@ define i32 @maxi8(i32) { ; CHECK-LABEL: @maxi8( ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16 +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef +; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef +; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef +; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef +; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef +; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef +; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP2]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP2]], <8 x i32> [[RDX_SHUF]] @@ -21,8 +34,9 @@ define i32 @maxi8(i32) { ; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]] -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 -; CHECK-NEXT: ret i32 [[TMP3]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef +; CHECK-NEXT: ret i32 [[TMP16]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 @@ -52,6 +66,35 @@ define i32 @maxi8(i32) { define i32 @maxi16(i32) { ; CHECK-LABEL: @maxi16( ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr to <16 x i32>*), align 16 +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef +; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef +; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef +; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef +; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef +; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef +; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef +; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef +; CHECK-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef +; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 undef +; CHECK-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef +; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 undef +; CHECK-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef +; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef +; CHECK-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef +; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef +; CHECK-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef +; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef +; CHECK-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef +; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef +; CHECK-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef +; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 undef +; CHECK-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> undef, <16 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <16 x i32> [[TMP2]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x i32> [[TMP2]], <16 x i32> [[RDX_SHUF]] @@ -64,8 +107,9 @@ define i32 @maxi16(i32) { ; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> undef, <16 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> [[RDX_SHUF7]] -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0 -; CHECK-NEXT: ret i32 [[TMP3]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0 +; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 undef +; CHECK-NEXT: ret i32 [[TMP32]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 @@ -119,6 +163,67 @@ define i32 @maxi16(i32) { define i32 @maxi32(i32) { ; CHECK-LABEL: @maxi32( ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr to <32 x i32>*), align 16 +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef +; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef +; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef +; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef +; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef +; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef +; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef +; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef +; CHECK-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef +; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 undef +; CHECK-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef +; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 undef +; CHECK-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef +; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef +; CHECK-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef +; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef +; CHECK-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef +; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef +; CHECK-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef +; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef +; CHECK-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef +; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 undef +; CHECK-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef +; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 undef +; CHECK-NEXT: [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], undef +; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32 undef +; CHECK-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP34]], undef +; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP34]], i32 undef +; CHECK-NEXT: [[TMP37:%.*]] = icmp sgt i32 [[TMP36]], undef +; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[TMP36]], i32 undef +; CHECK-NEXT: [[TMP39:%.*]] = icmp sgt i32 [[TMP38]], undef +; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 [[TMP38]], i32 undef +; CHECK-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP40]], undef +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP40]], i32 undef +; CHECK-NEXT: [[TMP43:%.*]] = icmp sgt i32 [[TMP42]], undef +; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP42]], i32 undef +; CHECK-NEXT: [[TMP45:%.*]] = icmp sgt i32 [[TMP44]], undef +; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], i32 [[TMP44]], i32 undef +; CHECK-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP46]], undef +; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP46]], i32 undef +; CHECK-NEXT: [[TMP49:%.*]] = icmp sgt i32 [[TMP48]], undef +; CHECK-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], i32 [[TMP48]], i32 undef +; CHECK-NEXT: [[TMP51:%.*]] = icmp sgt i32 [[TMP50]], undef +; CHECK-NEXT: [[TMP52:%.*]] = select i1 [[TMP51]], i32 [[TMP50]], i32 undef +; CHECK-NEXT: [[TMP53:%.*]] = icmp sgt i32 [[TMP52]], undef +; CHECK-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP52]], i32 undef +; CHECK-NEXT: [[TMP55:%.*]] = icmp sgt i32 [[TMP54]], undef +; CHECK-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], i32 [[TMP54]], i32 undef +; CHECK-NEXT: [[TMP57:%.*]] = icmp sgt i32 [[TMP56]], undef +; CHECK-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32 undef +; CHECK-NEXT: [[TMP59:%.*]] = icmp sgt i32 [[TMP58]], undef +; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP58]], i32 undef +; CHECK-NEXT: [[TMP61:%.*]] = icmp sgt i32 [[TMP60]], undef +; CHECK-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], i32 [[TMP60]], i32 undef +; CHECK-NEXT: [[TMP63:%.*]] = icmp sgt i32 [[TMP62]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP2]], <32 x i32> undef, <32 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <32 x i32> [[TMP2]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x i32> [[TMP2]], <32 x i32> [[RDX_SHUF]] @@ -134,8 +239,9 @@ define i32 @maxi32(i32) { ; CHECK-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> undef, <32 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP11:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> [[RDX_SHUF10]] -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 -; CHECK-NEXT: ret i32 [[TMP3]] +; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 +; CHECK-NEXT: [[TMP65:%.*]] = select i1 [[TMP63]], i32 [[TMP62]], i32 undef +; CHECK-NEXT: ret i32 [[TMP64]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 @@ -237,6 +343,19 @@ define i32 @maxi32(i32) { define float @maxf8(float) { ; CHECK-LABEL: @maxf8( ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr1 to <8 x float>*), align 16 +; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef +; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef +; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef +; CHECK-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef +; CHECK-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef +; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef +; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <8 x float> [[TMP2]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]] @@ -246,8 +365,9 @@ define float @maxf8(float) { ; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> [[RDX_SHUF4]] -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0 -; CHECK-NEXT: ret float [[TMP3]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef +; CHECK-NEXT: ret float [[TMP16]] ; %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 @@ -277,6 +397,35 @@ define float @maxf8(float) { define float @maxf16(float) { ; CHECK-LABEL: @maxf16( ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr1 to <16 x float>*), align 16 +; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef +; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef +; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef +; CHECK-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef +; CHECK-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef +; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef +; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef +; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef +; CHECK-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef +; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]], float undef +; CHECK-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef +; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]], float undef +; CHECK-NEXT: [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef +; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]], float undef +; CHECK-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef +; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]], float undef +; CHECK-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef +; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]], float undef +; CHECK-NEXT: [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef +; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]], float undef +; CHECK-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef +; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]], float undef +; CHECK-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> undef, <16 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <16 x float> [[TMP2]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]] @@ -289,8 +438,9 @@ define float @maxf16(float) { ; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> undef, <16 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> [[RDX_SHUF7]] -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0 -; CHECK-NEXT: ret float [[TMP3]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0 +; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], float [[TMP30]], float undef +; CHECK-NEXT: ret float [[TMP32]] ; %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 @@ -344,6 +494,67 @@ define float @maxf16(float) { define float @maxf32(float) { ; CHECK-LABEL: @maxf32( ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x float>, <32 x float>* bitcast ([32 x float]* @arr1 to <32 x float>*), align 16 +; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef +; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef +; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef +; CHECK-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef +; CHECK-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef +; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef +; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef +; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef +; CHECK-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef +; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]], float undef +; CHECK-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef +; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]], float undef +; CHECK-NEXT: [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef +; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]], float undef +; CHECK-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef +; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]], float undef +; CHECK-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef +; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]], float undef +; CHECK-NEXT: [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef +; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]], float undef +; CHECK-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef +; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]], float undef +; CHECK-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef +; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP30]], float undef +; CHECK-NEXT: [[TMP33:%.*]] = fcmp fast ogt float [[TMP32]], undef +; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], float [[TMP32]], float undef +; CHECK-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP34]], undef +; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP34]], float undef +; CHECK-NEXT: [[TMP37:%.*]] = fcmp fast ogt float [[TMP36]], undef +; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP36]], float undef +; CHECK-NEXT: [[TMP39:%.*]] = fcmp fast ogt float [[TMP38]], undef +; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], float [[TMP38]], float undef +; CHECK-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP40]], undef +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP40]], float undef +; CHECK-NEXT: [[TMP43:%.*]] = fcmp fast ogt float [[TMP42]], undef +; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP42]], float undef +; CHECK-NEXT: [[TMP45:%.*]] = fcmp fast ogt float [[TMP44]], undef +; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], float [[TMP44]], float undef +; CHECK-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP46]], undef +; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP46]], float undef +; CHECK-NEXT: [[TMP49:%.*]] = fcmp fast ogt float [[TMP48]], undef +; CHECK-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], float [[TMP48]], float undef +; CHECK-NEXT: [[TMP51:%.*]] = fcmp fast ogt float [[TMP50]], undef +; CHECK-NEXT: [[TMP52:%.*]] = select i1 [[TMP51]], float [[TMP50]], float undef +; CHECK-NEXT: [[TMP53:%.*]] = fcmp fast ogt float [[TMP52]], undef +; CHECK-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], float [[TMP52]], float undef +; CHECK-NEXT: [[TMP55:%.*]] = fcmp fast ogt float [[TMP54]], undef +; CHECK-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], float [[TMP54]], float undef +; CHECK-NEXT: [[TMP57:%.*]] = fcmp fast ogt float [[TMP56]], undef +; CHECK-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], float [[TMP56]], float undef +; CHECK-NEXT: [[TMP59:%.*]] = fcmp fast ogt float [[TMP58]], undef +; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], float [[TMP58]], float undef +; CHECK-NEXT: [[TMP61:%.*]] = fcmp fast ogt float [[TMP60]], undef +; CHECK-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], float [[TMP60]], float undef +; CHECK-NEXT: [[TMP63:%.*]] = fcmp fast ogt float [[TMP62]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP2]], <32 x float> undef, <32 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <32 x float> [[TMP2]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x float> [[TMP2]], <32 x float> [[RDX_SHUF]] @@ -359,8 +570,9 @@ define float @maxf32(float) { ; CHECK-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float> undef, <32 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP11:%.*]] = fcmp fast ogt <32 x float> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float> [[RDX_SHUF10]] -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <32 x float> [[RDX_MINMAX_SELECT12]], i32 0 -; CHECK-NEXT: ret float [[TMP3]] +; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x float> [[RDX_MINMAX_SELECT12]], i32 0 +; CHECK-NEXT: [[TMP65:%.*]] = select i1 [[TMP63]], float [[TMP62]], float undef +; CHECK-NEXT: ret float [[TMP64]] ; %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 @@ -466,24 +678,34 @@ define i32 @maxi8_mutiple_uses(i32) { ; SSE-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]] ; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] ; SSE-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 -; SSE-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef +; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef +; SSE-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef +; SSE-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef +; SSE-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef +; SSE-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef +; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef +; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef +; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; SSE-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] ; SSE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> ; SSE-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]] ; SSE-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]] ; SSE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> ; SSE-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; SSE-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] -; SSE-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 -; SSE-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]] -; SSE-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP7]] -; SSE-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]] -; SSE-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 [[TMP5]] -; SSE-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP12]] -; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA]], i32 [[TMP12]] -; SSE-NEXT: [[TMP15:%.*]] = select i1 [[TMP4]], i32 3, i32 4 -; SSE-NEXT: store i32 [[TMP15]], i32* @var, align 8 -; SSE-NEXT: ret i32 [[TMP14]] +; SSE-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; SSE-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], [[TMP15]] +; SSE-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP17]], i32 [[TMP15]] +; SSE-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], [[TMP5]] +; SSE-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP20]], i32 [[TMP19]], i32 [[TMP5]] +; SSE-NEXT: [[TMP21:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] +; SSE-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; SSE-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP22]] +; SSE-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[OP_EXTRA]], i32 [[TMP22]] +; SSE-NEXT: [[TMP25:%.*]] = select i1 [[TMP4]], i32 3, i32 4 +; SSE-NEXT: store i32 [[TMP25]], i32* @var, align 8 +; SSE-NEXT: ret i32 [[TMP24]] ; ; AVX-LABEL: @maxi8_mutiple_uses( ; AVX-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 @@ -491,24 +713,34 @@ define i32 @maxi8_mutiple_uses(i32) { ; AVX-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]] ; AVX-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] ; AVX-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 -; AVX-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; AVX-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef +; AVX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef +; AVX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef +; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef +; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef +; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef +; AVX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef +; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef +; AVX-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; AVX-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] ; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]] ; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]] ; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] -; AVX-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 -; AVX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]] -; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP7]] -; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]] -; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 [[TMP5]] -; AVX-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; AVX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP12]] -; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA]], i32 [[TMP12]] -; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP4]], i32 3, i32 4 -; AVX-NEXT: store i32 [[TMP15]], i32* @var, align 8 -; AVX-NEXT: ret i32 [[TMP14]] +; AVX-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; AVX-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], [[TMP15]] +; AVX-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP17]], i32 [[TMP15]] +; AVX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], [[TMP5]] +; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP20]], i32 [[TMP19]], i32 [[TMP5]] +; AVX-NEXT: [[TMP21:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] +; AVX-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; AVX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP22]] +; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[OP_EXTRA]], i32 [[TMP22]] +; AVX-NEXT: [[TMP25:%.*]] = select i1 [[TMP4]], i32 3, i32 4 +; AVX-NEXT: store i32 [[TMP25]], i32* @var, align 8 +; AVX-NEXT: ret i32 [[TMP24]] ; ; AVX2-LABEL: @maxi8_mutiple_uses( ; AVX2-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 @@ -516,24 +748,34 @@ define i32 @maxi8_mutiple_uses(i32) { ; AVX2-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]] ; AVX2-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] ; AVX2-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 -; AVX2-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; AVX2-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef +; AVX2-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef +; AVX2-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef +; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef +; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef +; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef +; AVX2-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef +; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef +; AVX2-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; AVX2-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] ; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] -; AVX2-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 -; AVX2-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]] -; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP7]] -; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]] -; AVX2-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 [[TMP5]] -; AVX2-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; AVX2-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP12]] -; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA]], i32 [[TMP12]] -; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP4]], i32 3, i32 4 -; AVX2-NEXT: store i32 [[TMP15]], i32* @var, align 8 -; AVX2-NEXT: ret i32 [[TMP14]] +; AVX2-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; AVX2-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], [[TMP15]] +; AVX2-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP17]], i32 [[TMP15]] +; AVX2-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], [[TMP5]] +; AVX2-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP20]], i32 [[TMP19]], i32 [[TMP5]] +; AVX2-NEXT: [[TMP21:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] +; AVX2-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP22]] +; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[OP_EXTRA]], i32 [[TMP22]] +; AVX2-NEXT: [[TMP25:%.*]] = select i1 [[TMP4]], i32 3, i32 4 +; AVX2-NEXT: store i32 [[TMP25]], i32* @var, align 8 +; AVX2-NEXT: ret i32 [[TMP24]] ; ; SKX-LABEL: @maxi8_mutiple_uses( ; SKX-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([32 x i32]* @arr to <2 x i32>*), align 16 @@ -555,16 +797,26 @@ define i32 @maxi8_mutiple_uses(i32) { ; SKX-NEXT: [[TMP12:%.*]] = icmp sgt <2 x i32> [[TMP9]], [[TMP11]] ; SKX-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP12]], <2 x i32> [[TMP9]], <2 x i32> [[TMP11]] ; SKX-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[TMP13]], i32 1 -; SKX-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0 -; SKX-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP15]], [[TMP14]] -; SKX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP16]], i32 [[TMP15]], i32 [[TMP14]] -; SKX-NEXT: [[TMP17:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; SKX-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP17]] -; SKX-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[OP_EXTRA]], i32 [[TMP17]] -; SKX-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1 -; SKX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 3, i32 4 -; SKX-NEXT: store i32 [[TMP21]], i32* @var, align 8 -; SKX-NEXT: ret i32 [[TMP19]] +; SKX-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef +; SKX-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef +; SKX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef +; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 undef +; SKX-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef +; SKX-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 undef +; SKX-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef +; SKX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef +; SKX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP6]] +; SKX-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0 +; SKX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP14]] +; SKX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP14]] +; SKX-NEXT: [[TMP26:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 [[TMP6]] +; SKX-NEXT: [[TMP27:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; SKX-NEXT: [[TMP28:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP27]] +; SKX-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[OP_EXTRA]], i32 [[TMP27]] +; SKX-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1 +; SKX-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i32 3, i32 4 +; SKX-NEXT: store i32 [[TMP31]], i32* @var, align 8 +; SKX-NEXT: ret i32 [[TMP29]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 @@ -602,21 +854,33 @@ define i32 @maxi8_wrong_parent(i32) { ; SSE: pp: ; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] ; SSE-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 -; SSE-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; SSE-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef +; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef +; SSE-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef +; SSE-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef +; SSE-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef +; SSE-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef +; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef +; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef +; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; SSE-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] +; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] +; SSE-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; SSE-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] ; SSE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> ; SSE-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]] ; SSE-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]] ; SSE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> ; SSE-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; SSE-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] -; SSE-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 -; SSE-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]] -; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]] -; SSE-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]] -; SSE-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]] -; SSE-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]] -; SSE-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 [[TMP5]] +; SSE-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; SSE-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], [[TMP15]] +; SSE-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 [[TMP15]] +; SSE-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP18]] +; SSE-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 [[TMP18]] +; SSE-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP5]] +; SSE-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP5]] +; SSE-NEXT: [[TMP26:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]] ; SSE-NEXT: ret i32 [[OP_EXTRA]] ; ; AVX-LABEL: @maxi8_wrong_parent( @@ -627,21 +891,33 @@ define i32 @maxi8_wrong_parent(i32) { ; AVX: pp: ; AVX-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] ; AVX-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 -; AVX-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; AVX-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; AVX-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef +; AVX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef +; AVX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef +; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef +; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef +; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef +; AVX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef +; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef +; AVX-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; AVX-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] +; AVX-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] +; AVX-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; AVX-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] ; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]] ; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]] ; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] -; AVX-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 -; AVX-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]] -; AVX-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]] -; AVX-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]] -; AVX-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]] -; AVX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]] -; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 [[TMP5]] +; AVX-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; AVX-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], [[TMP15]] +; AVX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 [[TMP15]] +; AVX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP18]] +; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 [[TMP18]] +; AVX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP5]] +; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP5]] +; AVX-NEXT: [[TMP26:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]] ; AVX-NEXT: ret i32 [[OP_EXTRA]] ; ; AVX2-LABEL: @maxi8_wrong_parent( @@ -652,21 +928,33 @@ define i32 @maxi8_wrong_parent(i32) { ; AVX2: pp: ; AVX2-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] ; AVX2-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 -; AVX2-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; AVX2-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; AVX2-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef +; AVX2-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef +; AVX2-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef +; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef +; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef +; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef +; AVX2-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef +; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef +; AVX2-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; AVX2-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] +; AVX2-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] +; AVX2-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; AVX2-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] ; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] -; AVX2-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 -; AVX2-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]] -; AVX2-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]] -; AVX2-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]] -; AVX2-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]] -; AVX2-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]] -; AVX2-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 [[TMP5]] +; AVX2-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; AVX2-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], [[TMP15]] +; AVX2-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 [[TMP15]] +; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP18]] +; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 [[TMP18]] +; AVX2-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP5]] +; AVX2-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP5]] +; AVX2-NEXT: [[TMP26:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]] ; AVX2-NEXT: ret i32 [[OP_EXTRA]] ; ; SKX-LABEL: @maxi8_wrong_parent( @@ -697,9 +985,21 @@ define i32 @maxi8_wrong_parent(i32) { ; SKX-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> [[TMP17]], i32 [[TMP4]], i32 1 ; SKX-NEXT: [[TMP19:%.*]] = select <2 x i1> [[TMP14]], <2 x i32> [[TMP16]], <2 x i32> [[TMP18]] ; SKX-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1 -; SKX-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0 -; SKX-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP21]], [[TMP20]] -; SKX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP22]], i32 [[TMP21]], i32 [[TMP20]] +; SKX-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef +; SKX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef +; SKX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef +; SKX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef +; SKX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef +; SKX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef +; SKX-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef +; SKX-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef +; SKX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], [[TMP7]] +; SKX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 [[TMP7]] +; SKX-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], [[TMP8]] +; SKX-NEXT: [[TMP32:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0 +; SKX-NEXT: [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], [[TMP20]] +; SKX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32 [[TMP20]] +; SKX-NEXT: [[TMP34:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 [[TMP8]] ; SKX-NEXT: ret i32 [[OP_EXTRA]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 diff --git a/test/Transforms/SLPVectorizer/X86/horizontal.ll b/test/Transforms/SLPVectorizer/X86/horizontal.ll index ed0e5784e30..311d8a476c7 100644 --- a/test/Transforms/SLPVectorizer/X86/horizontal.ll +++ b/test/Transforms/SLPVectorizer/X86/horizontal.ll @@ -37,11 +37,14 @@ define i32 @add_red(float* %A, i32 %n) { ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], +; CHECK-NEXT: [[ADD6:%.*]] = fadd fast float undef, undef +; CHECK-NEXT: [[ADD11:%.*]] = fadd fast float [[ADD6]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 +; CHECK-NEXT: [[ADD16:%.*]] = fadd fast float [[ADD11]], undef ; CHECK-NEXT: [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]] ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_033]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]] @@ -74,11 +77,14 @@ define i32 @add_red(float* %A, i32 %n) { ; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>* ; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; STORE-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], +; STORE-NEXT: [[ADD6:%.*]] = fadd fast float undef, undef +; STORE-NEXT: [[ADD11:%.*]] = fadd fast float [[ADD6]], undef ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; STORE-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; STORE-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 +; STORE-NEXT: [[ADD16:%.*]] = fadd fast float [[ADD11]], undef ; STORE-NEXT: [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]] ; STORE-NEXT: [[INC]] = add nsw i64 [[I_033]], 1 ; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]] @@ -172,11 +178,14 @@ define i32 @mul_red(float* noalias %A, float* noalias %B, i32 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]] +; CHECK-NEXT: [[ADD8:%.*]] = fadd fast float undef, undef +; CHECK-NEXT: [[ADD14:%.*]] = fadd fast float [[ADD8]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 +; CHECK-NEXT: [[ADD20:%.*]] = fadd fast float [[ADD14]], undef ; CHECK-NEXT: [[MUL21]] = fmul float [[SUM_039]], [[TMP6]] ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_040]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] @@ -214,11 +223,14 @@ define i32 @mul_red(float* noalias %A, float* noalias %B, i32 %n) { ; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* ; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 ; STORE-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]] +; STORE-NEXT: [[ADD8:%.*]] = fadd fast float undef, undef +; STORE-NEXT: [[ADD14:%.*]] = fadd fast float [[ADD8]], undef ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <4 x i32> ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]], [[RDX_SHUF]] ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; STORE-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; STORE-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 +; STORE-NEXT: [[ADD20:%.*]] = fadd fast float [[ADD14]], undef ; STORE-NEXT: [[MUL21]] = fmul float [[SUM_039]], [[TMP6]] ; STORE-NEXT: [[INC]] = add nsw i64 [[I_040]], 1 ; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] @@ -338,6 +350,13 @@ define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) { ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x float>* ; CHECK-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]] +; CHECK-NEXT: [[ADD8:%.*]] = fadd fast float undef, undef +; CHECK-NEXT: [[ADD14:%.*]] = fadd fast float [[ADD8]], undef +; CHECK-NEXT: [[ADD20:%.*]] = fadd fast float [[ADD14]], undef +; CHECK-NEXT: [[ADD26:%.*]] = fadd fast float [[ADD20]], undef +; CHECK-NEXT: [[ADD32:%.*]] = fadd fast float [[ADD26]], undef +; CHECK-NEXT: [[ADD38:%.*]] = fadd fast float [[ADD32]], undef +; CHECK-NEXT: [[ADD44:%.*]] = fadd fast float [[ADD38]], undef ; CHECK-NEXT: [[ADD47:%.*]] = add nsw i64 [[MUL]], 8 ; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD47]] ; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4 @@ -350,6 +369,7 @@ define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) { ; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]] +; CHECK-NEXT: [[ADD50:%.*]] = fadd fast float [[ADD44]], [[MUL49]] ; CHECK-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]] ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_083]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]] @@ -401,6 +421,13 @@ define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) { ; STORE-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x float>* ; STORE-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 ; STORE-NEXT: [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]] +; STORE-NEXT: [[ADD8:%.*]] = fadd fast float undef, undef +; STORE-NEXT: [[ADD14:%.*]] = fadd fast float [[ADD8]], undef +; STORE-NEXT: [[ADD20:%.*]] = fadd fast float [[ADD14]], undef +; STORE-NEXT: [[ADD26:%.*]] = fadd fast float [[ADD20]], undef +; STORE-NEXT: [[ADD32:%.*]] = fadd fast float [[ADD26]], undef +; STORE-NEXT: [[ADD38:%.*]] = fadd fast float [[ADD32]], undef +; STORE-NEXT: [[ADD44:%.*]] = fadd fast float [[ADD38]], undef ; STORE-NEXT: [[ADD47:%.*]] = add nsw i64 [[MUL]], 8 ; STORE-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD47]] ; STORE-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4 @@ -413,6 +440,7 @@ define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) { ; STORE-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]] ; STORE-NEXT: [[TMP8:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; STORE-NEXT: [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]] +; STORE-NEXT: [[ADD50:%.*]] = fadd fast float [[ADD44]], [[MUL49]] ; STORE-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]] ; STORE-NEXT: [[INC]] = add nsw i64 [[I_083]], 1 ; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]] @@ -548,12 +576,16 @@ define i32 @chain_red(float* noalias %A, float* noalias %B, i32 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]] +; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[SUM_042]], undef +; CHECK-NEXT: [[ADD9:%.*]] = fadd fast float [[ADD]], undef +; CHECK-NEXT: [[ADD15:%.*]] = fadd fast float [[ADD9]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 ; CHECK-NEXT: [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]] +; CHECK-NEXT: [[ADD21:%.*]] = fadd fast float [[ADD15]], undef ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_043]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] @@ -590,12 +622,16 @@ define i32 @chain_red(float* noalias %A, float* noalias %B, i32 %n) { ; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* ; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 ; STORE-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]] +; STORE-NEXT: [[ADD:%.*]] = fadd fast float [[SUM_042]], undef +; STORE-NEXT: [[ADD9:%.*]] = fadd fast float [[ADD]], undef +; STORE-NEXT: [[ADD15:%.*]] = fadd fast float [[ADD9]], undef ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <4 x i32> ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]], [[RDX_SHUF]] ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; STORE-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; STORE-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 ; STORE-NEXT: [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]] +; STORE-NEXT: [[ADD21:%.*]] = fadd fast float [[ADD15]], undef ; STORE-NEXT: [[INC]] = add nsw i64 [[I_043]], 1 ; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] ; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] @@ -1051,11 +1087,14 @@ define i32 @store_red(float* noalias %A, float* noalias %B, float* noalias %C, i ; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* ; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 ; STORE-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP4]] +; STORE-NEXT: [[ADD8:%.*]] = fadd fast float undef, undef +; STORE-NEXT: [[ADD14:%.*]] = fadd fast float [[ADD8]], undef ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <4 x i32> ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]], [[RDX_SHUF]] ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; STORE-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; STORE-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 +; STORE-NEXT: [[ADD20:%.*]] = fadd fast float [[ADD14]], undef ; STORE-NEXT: store float [[TMP6]], float* [[C_ADDR_038]], align 4 ; STORE-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[C_ADDR_038]], i64 1 ; STORE-NEXT: [[INC]] = add nsw i64 [[I_039]], 1 @@ -1130,11 +1169,14 @@ define void @float_red_example4(float* %res) { ; STORE-LABEL: @float_red_example4( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([32 x float]* @arr_float to <4 x float>*), align 16 +; STORE-NEXT: [[ADD:%.*]] = fadd fast float undef, undef +; STORE-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]] ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> undef, <4 x i32> ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP0]], [[RDX_SHUF]] ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; STORE-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; STORE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 +; STORE-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] ; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16 ; STORE-NEXT: ret void ; @@ -1174,6 +1216,12 @@ define void @float_red_example8(float* %res) { ; STORE-LABEL: @float_red_example8( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr_float to <8 x float>*), align 16 +; STORE-NEXT: [[ADD:%.*]] = fadd fast float undef, undef +; STORE-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]] +; STORE-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] +; STORE-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] +; STORE-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] +; STORE-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> undef, <8 x i32> ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP0]], [[RDX_SHUF]] ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -1181,6 +1229,7 @@ define void @float_red_example8(float* %res) { ; STORE-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> ; STORE-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]] ; STORE-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 +; STORE-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] ; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16 ; STORE-NEXT: ret void ; @@ -1244,6 +1293,20 @@ define void @float_red_example16(float* %res) { ; STORE-LABEL: @float_red_example16( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr_float to <16 x float>*), align 16 +; STORE-NEXT: [[ADD:%.*]] = fadd fast float undef, undef +; STORE-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]] +; STORE-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] +; STORE-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] +; STORE-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] +; STORE-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] +; STORE-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] +; STORE-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] +; STORE-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] +; STORE-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] +; STORE-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] +; STORE-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] +; STORE-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] +; STORE-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> undef, <16 x i32> ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP0]], [[RDX_SHUF]] ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[BIN_RDX]], <16 x float> undef, <16 x i32> @@ -1253,6 +1316,7 @@ define void @float_red_example16(float* %res) { ; STORE-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <16 x float> [[BIN_RDX4]], <16 x float> undef, <16 x i32> ; STORE-NEXT: [[BIN_RDX6:%.*]] = fadd fast <16 x float> [[BIN_RDX4]], [[RDX_SHUF5]] ; STORE-NEXT: [[TMP1:%.*]] = extractelement <16 x float> [[BIN_RDX6]], i32 0 +; STORE-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] ; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16 ; STORE-NEXT: ret void ; @@ -1308,11 +1372,14 @@ define void @i32_red_example4(i32* %res) { ; STORE-LABEL: @i32_red_example4( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr_i32 to <4 x i32>*), align 16 +; STORE-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef +; STORE-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]] ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> undef, <4 x i32> ; STORE-NEXT: [[BIN_RDX:%.*]] = add nsw <4 x i32> [[TMP0]], [[RDX_SHUF]] ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> ; STORE-NEXT: [[BIN_RDX2:%.*]] = add nsw <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]] ; STORE-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 +; STORE-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]] ; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 ; STORE-NEXT: ret void ; @@ -1352,6 +1419,12 @@ define void @i32_red_example8(i32* %res) { ; STORE-LABEL: @i32_red_example8( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 +; STORE-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef +; STORE-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]] +; STORE-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]] +; STORE-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]] +; STORE-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]] +; STORE-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]] ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32> ; STORE-NEXT: [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]] ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -1359,6 +1432,7 @@ define void @i32_red_example8(i32* %res) { ; STORE-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; STORE-NEXT: [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; STORE-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 +; STORE-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]] ; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 ; STORE-NEXT: ret void ; @@ -1422,6 +1496,20 @@ define void @i32_red_example16(i32* %res) { ; STORE-LABEL: @i32_red_example16( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr_i32 to <16 x i32>*), align 16 +; STORE-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef +; STORE-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]] +; STORE-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]] +; STORE-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]] +; STORE-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]] +; STORE-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]] +; STORE-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]] +; STORE-NEXT: [[ADD_7:%.*]] = add nsw i32 undef, [[ADD_6]] +; STORE-NEXT: [[ADD_8:%.*]] = add nsw i32 undef, [[ADD_7]] +; STORE-NEXT: [[ADD_9:%.*]] = add nsw i32 undef, [[ADD_8]] +; STORE-NEXT: [[ADD_10:%.*]] = add nsw i32 undef, [[ADD_9]] +; STORE-NEXT: [[ADD_11:%.*]] = add nsw i32 undef, [[ADD_10]] +; STORE-NEXT: [[ADD_12:%.*]] = add nsw i32 undef, [[ADD_11]] +; STORE-NEXT: [[ADD_13:%.*]] = add nsw i32 undef, [[ADD_12]] ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> undef, <16 x i32> ; STORE-NEXT: [[BIN_RDX:%.*]] = add nsw <16 x i32> [[TMP0]], [[RDX_SHUF]] ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[BIN_RDX]], <16 x i32> undef, <16 x i32> @@ -1431,6 +1519,7 @@ define void @i32_red_example16(i32* %res) { ; STORE-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <16 x i32> [[BIN_RDX4]], <16 x i32> undef, <16 x i32> ; STORE-NEXT: [[BIN_RDX6:%.*]] = add nsw <16 x i32> [[BIN_RDX4]], [[RDX_SHUF5]] ; STORE-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[BIN_RDX6]], i32 0 +; STORE-NEXT: [[ADD_14:%.*]] = add nsw i32 undef, [[ADD_13]] ; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 ; STORE-NEXT: ret void ; @@ -1542,6 +1631,36 @@ define void @i32_red_example32(i32* %res) { ; STORE-LABEL: @i32_red_example32( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr_i32 to <32 x i32>*), align 16 +; STORE-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef +; STORE-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]] +; STORE-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]] +; STORE-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]] +; STORE-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]] +; STORE-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]] +; STORE-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]] +; STORE-NEXT: [[ADD_7:%.*]] = add nsw i32 undef, [[ADD_6]] +; STORE-NEXT: [[ADD_8:%.*]] = add nsw i32 undef, [[ADD_7]] +; STORE-NEXT: [[ADD_9:%.*]] = add nsw i32 undef, [[ADD_8]] +; STORE-NEXT: [[ADD_10:%.*]] = add nsw i32 undef, [[ADD_9]] +; STORE-NEXT: [[ADD_11:%.*]] = add nsw i32 undef, [[ADD_10]] +; STORE-NEXT: [[ADD_12:%.*]] = add nsw i32 undef, [[ADD_11]] +; STORE-NEXT: [[ADD_13:%.*]] = add nsw i32 undef, [[ADD_12]] +; STORE-NEXT: [[ADD_14:%.*]] = add nsw i32 undef, [[ADD_13]] +; STORE-NEXT: [[ADD_15:%.*]] = add nsw i32 undef, [[ADD_14]] +; STORE-NEXT: [[ADD_16:%.*]] = add nsw i32 undef, [[ADD_15]] +; STORE-NEXT: [[ADD_17:%.*]] = add nsw i32 undef, [[ADD_16]] +; STORE-NEXT: [[ADD_18:%.*]] = add nsw i32 undef, [[ADD_17]] +; STORE-NEXT: [[ADD_19:%.*]] = add nsw i32 undef, [[ADD_18]] +; STORE-NEXT: [[ADD_20:%.*]] = add nsw i32 undef, [[ADD_19]] +; STORE-NEXT: [[ADD_21:%.*]] = add nsw i32 undef, [[ADD_20]] +; STORE-NEXT: [[ADD_22:%.*]] = add nsw i32 undef, [[ADD_21]] +; STORE-NEXT: [[ADD_23:%.*]] = add nsw i32 undef, [[ADD_22]] +; STORE-NEXT: [[ADD_24:%.*]] = add nsw i32 undef, [[ADD_23]] +; STORE-NEXT: [[ADD_25:%.*]] = add nsw i32 undef, [[ADD_24]] +; STORE-NEXT: [[ADD_26:%.*]] = add nsw i32 undef, [[ADD_25]] +; STORE-NEXT: [[ADD_27:%.*]] = add nsw i32 undef, [[ADD_26]] +; STORE-NEXT: [[ADD_28:%.*]] = add nsw i32 undef, [[ADD_27]] +; STORE-NEXT: [[ADD_29:%.*]] = add nsw i32 undef, [[ADD_28]] ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> undef, <32 x i32> ; STORE-NEXT: [[BIN_RDX:%.*]] = add nsw <32 x i32> [[TMP0]], [[RDX_SHUF]] ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x i32> [[BIN_RDX]], <32 x i32> undef, <32 x i32> @@ -1553,6 +1672,7 @@ define void @i32_red_example32(i32* %res) { ; STORE-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x i32> [[BIN_RDX6]], <32 x i32> undef, <32 x i32> ; STORE-NEXT: [[BIN_RDX8:%.*]] = add nsw <32 x i32> [[BIN_RDX6]], [[RDX_SHUF7]] ; STORE-NEXT: [[TMP1:%.*]] = extractelement <32 x i32> [[BIN_RDX8]], i32 0 +; STORE-NEXT: [[ADD_30:%.*]] = add nsw i32 undef, [[ADD_29]] ; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 ; STORE-NEXT: ret void ; @@ -1630,6 +1750,12 @@ define void @i32_red_call(i32 %val) { ; CHECK-LABEL: @i32_red_call( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef +; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]] +; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]] +; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]] +; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]] +; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -1637,12 +1763,19 @@ define void @i32_red_call(i32 %val) { ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 +; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]] ; CHECK-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]]) ; CHECK-NEXT: ret void ; ; STORE-LABEL: @i32_red_call( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 +; STORE-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef +; STORE-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]] +; STORE-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]] +; STORE-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]] +; STORE-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]] +; STORE-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]] ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32> ; STORE-NEXT: [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]] ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -1650,6 +1783,7 @@ define void @i32_red_call(i32 %val) { ; STORE-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; STORE-NEXT: [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; STORE-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 +; STORE-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]] ; STORE-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]]) ; STORE-NEXT: ret void ; @@ -1677,6 +1811,12 @@ define void @i32_red_invoke(i32 %val) personality i32 (...)* @__gxx_personality_ ; CHECK-LABEL: @i32_red_invoke( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef +; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]] +; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]] +; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]] +; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]] +; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -1684,6 +1824,7 @@ define void @i32_red_invoke(i32 %val) personality i32 (...)* @__gxx_personality_ ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 +; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]] ; CHECK-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]]) ; CHECK-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]] ; CHECK: exception: @@ -1696,6 +1837,12 @@ define void @i32_red_invoke(i32 %val) personality i32 (...)* @__gxx_personality_ ; STORE-LABEL: @i32_red_invoke( ; STORE-NEXT: entry: ; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 +; STORE-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef +; STORE-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]] +; STORE-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]] +; STORE-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]] +; STORE-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]] +; STORE-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]] ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32> ; STORE-NEXT: [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]] ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -1703,6 +1850,7 @@ define void @i32_red_invoke(i32 %val) personality i32 (...)* @__gxx_personality_ ; STORE-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; STORE-NEXT: [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; STORE-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 +; STORE-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]] ; STORE-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]]) ; STORE-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]] ; STORE: exception: diff --git a/test/Transforms/SLPVectorizer/X86/long_chains.ll b/test/Transforms/SLPVectorizer/X86/long_chains.ll index 52c1c6bee12..ffbdd9f1d14 100644 --- a/test/Transforms/SLPVectorizer/X86/long_chains.ll +++ b/test/Transforms/SLPVectorizer/X86/long_chains.ll @@ -12,10 +12,10 @@ define i32 @test(double* nocapture %A, i8* nocapture %B) { ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[B:%.*]] to <2 x i8>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* [[TMP0]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i8> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i8> undef, i8 [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i8> [[TMP5]], i8 [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i8> undef, i8 [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i8> [[TMP4]], i8 [[TMP5]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = sitofp <2 x i8> [[TMP6]] to <2 x double> ; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], diff --git a/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll b/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll index 0b8c72b35a2..13bbe5e5798 100644 --- a/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll +++ b/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll @@ -5,6 +5,36 @@ define signext i8 @Foo(<32 x i8>* %__v) { ; CHECK-LABEL: @Foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <32 x i8>, <32 x i8>* [[__V:%.*]], align 32 +; CHECK-NEXT: [[ADD_I_1_I:%.*]] = add i8 undef, undef +; CHECK-NEXT: [[ADD_I_2_I:%.*]] = add i8 [[ADD_I_1_I]], undef +; CHECK-NEXT: [[ADD_I_3_I:%.*]] = add i8 [[ADD_I_2_I]], undef +; CHECK-NEXT: [[ADD_I_4_I:%.*]] = add i8 [[ADD_I_3_I]], undef +; CHECK-NEXT: [[ADD_I_5_I:%.*]] = add i8 [[ADD_I_4_I]], undef +; CHECK-NEXT: [[ADD_I_6_I:%.*]] = add i8 [[ADD_I_5_I]], undef +; CHECK-NEXT: [[ADD_I_7_I:%.*]] = add i8 [[ADD_I_6_I]], undef +; CHECK-NEXT: [[ADD_I_8_I:%.*]] = add i8 [[ADD_I_7_I]], undef +; CHECK-NEXT: [[ADD_I_9_I:%.*]] = add i8 [[ADD_I_8_I]], undef +; CHECK-NEXT: [[ADD_I_10_I:%.*]] = add i8 [[ADD_I_9_I]], undef +; CHECK-NEXT: [[ADD_I_11_I:%.*]] = add i8 [[ADD_I_10_I]], undef +; CHECK-NEXT: [[ADD_I_12_I:%.*]] = add i8 [[ADD_I_11_I]], undef +; CHECK-NEXT: [[ADD_I_13_I:%.*]] = add i8 [[ADD_I_12_I]], undef +; CHECK-NEXT: [[ADD_I_14_I:%.*]] = add i8 [[ADD_I_13_I]], undef +; CHECK-NEXT: [[ADD_I_15_I:%.*]] = add i8 [[ADD_I_14_I]], undef +; CHECK-NEXT: [[ADD_I_16_I:%.*]] = add i8 [[ADD_I_15_I]], undef +; CHECK-NEXT: [[ADD_I_17_I:%.*]] = add i8 [[ADD_I_16_I]], undef +; CHECK-NEXT: [[ADD_I_18_I:%.*]] = add i8 [[ADD_I_17_I]], undef +; CHECK-NEXT: [[ADD_I_19_I:%.*]] = add i8 [[ADD_I_18_I]], undef +; CHECK-NEXT: [[ADD_I_20_I:%.*]] = add i8 [[ADD_I_19_I]], undef +; CHECK-NEXT: [[ADD_I_21_I:%.*]] = add i8 [[ADD_I_20_I]], undef +; CHECK-NEXT: [[ADD_I_22_I:%.*]] = add i8 [[ADD_I_21_I]], undef +; CHECK-NEXT: [[ADD_I_23_I:%.*]] = add i8 [[ADD_I_22_I]], undef +; CHECK-NEXT: [[ADD_I_24_I:%.*]] = add i8 [[ADD_I_23_I]], undef +; CHECK-NEXT: [[ADD_I_25_I:%.*]] = add i8 [[ADD_I_24_I]], undef +; CHECK-NEXT: [[ADD_I_26_I:%.*]] = add i8 [[ADD_I_25_I]], undef +; CHECK-NEXT: [[ADD_I_27_I:%.*]] = add i8 [[ADD_I_26_I]], undef +; CHECK-NEXT: [[ADD_I_28_I:%.*]] = add i8 [[ADD_I_27_I]], undef +; CHECK-NEXT: [[ADD_I_29_I:%.*]] = add i8 [[ADD_I_28_I]], undef +; CHECK-NEXT: [[ADD_I_30_I:%.*]] = add i8 [[ADD_I_29_I]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i8> [[TMP0]], <32 x i8> undef, <32 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <32 x i8> [[TMP0]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x i8> [[BIN_RDX]], <32 x i8> undef, <32 x i32> @@ -16,6 +46,7 @@ define signext i8 @Foo(<32 x i8>* %__v) { ; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x i8> [[BIN_RDX6]], <32 x i8> undef, <32 x i32> ; CHECK-NEXT: [[BIN_RDX8:%.*]] = add <32 x i8> [[BIN_RDX6]], [[RDX_SHUF7]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <32 x i8> [[BIN_RDX8]], i32 0 +; CHECK-NEXT: [[ADD_I_31_I:%.*]] = add i8 [[ADD_I_30_I]], undef ; CHECK-NEXT: ret i8 [[TMP1]] ; entry: diff --git a/test/Transforms/SLPVectorizer/X86/reduction_loads.ll b/test/Transforms/SLPVectorizer/X86/reduction_loads.ll index 15ede8156fa..56539ab928e 100644 --- a/test/Transforms/SLPVectorizer/X86/reduction_loads.ll +++ b/test/Transforms/SLPVectorizer/X86/reduction_loads.ll @@ -35,6 +35,13 @@ define i32 @test(i32* nocapture readonly %p) { ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> [[TMP1]], +; CHECK-NEXT: [[ADD:%.*]] = add i32 undef, [[SUM]] +; CHECK-NEXT: [[ADD_1:%.*]] = add i32 undef, [[ADD]] +; CHECK-NEXT: [[ADD_2:%.*]] = add i32 undef, [[ADD_1]] +; CHECK-NEXT: [[ADD_3:%.*]] = add i32 undef, [[ADD_2]] +; CHECK-NEXT: [[ADD_4:%.*]] = add i32 undef, [[ADD_3]] +; CHECK-NEXT: [[ADD_5:%.*]] = add i32 undef, [[ADD_4]] +; CHECK-NEXT: [[ADD_6:%.*]] = add i32 undef, [[ADD_5]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP2]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -43,6 +50,7 @@ define i32 @test(i32* nocapture readonly %p) { ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], [[SUM]] +; CHECK-NEXT: [[ADD_7:%.*]] = add i32 undef, [[ADD_6]] ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 [[OP_EXTRA]] @@ -130,6 +138,13 @@ define i32 @test2(i32* nocapture readonly %p, i32* nocapture readonly %q) { ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[Q]] to <8 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[ADD:%.*]] = add i32 undef, [[SUM]] +; CHECK-NEXT: [[ADD_1:%.*]] = add i32 undef, [[ADD]] +; CHECK-NEXT: [[ADD_2:%.*]] = add i32 undef, [[ADD_1]] +; CHECK-NEXT: [[ADD_3:%.*]] = add i32 undef, [[ADD_2]] +; CHECK-NEXT: [[ADD_4:%.*]] = add i32 undef, [[ADD_3]] +; CHECK-NEXT: [[ADD_5:%.*]] = add i32 undef, [[ADD_4]] +; CHECK-NEXT: [[ADD_6:%.*]] = add i32 undef, [[ADD_5]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP4]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -138,6 +153,7 @@ define i32 @test2(i32* nocapture readonly %p, i32* nocapture readonly %q) { ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[OP_EXTRA]] = add i32 [[TMP5]], [[SUM]] +; CHECK-NEXT: [[ADD_7:%.*]] = add i32 undef, [[ADD_6]] ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 [[OP_EXTRA]] @@ -242,6 +258,13 @@ define i32 @test3(i32* nocapture readonly %p, i32* nocapture readonly %q) { ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[Q]] to <8 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> [[REORDER_SHUFFLE]], [[TMP3]] +; CHECK-NEXT: [[ADD:%.*]] = add i32 undef, [[SUM]] +; CHECK-NEXT: [[ADD_1:%.*]] = add i32 undef, [[ADD]] +; CHECK-NEXT: [[ADD_2:%.*]] = add i32 undef, [[ADD_1]] +; CHECK-NEXT: [[ADD_3:%.*]] = add i32 undef, [[ADD_2]] +; CHECK-NEXT: [[ADD_4:%.*]] = add i32 undef, [[ADD_3]] +; CHECK-NEXT: [[ADD_5:%.*]] = add i32 undef, [[ADD_4]] +; CHECK-NEXT: [[ADD_6:%.*]] = add i32 undef, [[ADD_5]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP4]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -250,6 +273,7 @@ define i32 @test3(i32* nocapture readonly %p, i32* nocapture readonly %q) { ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[OP_EXTRA]] = add i32 [[TMP5]], [[SUM]] +; CHECK-NEXT: [[ADD_7:%.*]] = add i32 undef, [[ADD_6]] ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 [[OP_EXTRA]] diff --git a/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll b/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll index 7fdc9539223..4dd40876a70 100644 --- a/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll +++ b/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll @@ -26,6 +26,12 @@ define i32 @test_add(i32* nocapture readonly %p) { ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: [[MUL_18:%.*]] = add i32 undef, undef +; CHECK-NEXT: [[MUL_29:%.*]] = add i32 undef, [[MUL_18]] +; CHECK-NEXT: [[MUL_310:%.*]] = add i32 undef, [[MUL_29]] +; CHECK-NEXT: [[MUL_411:%.*]] = add i32 undef, [[MUL_310]] +; CHECK-NEXT: [[MUL_512:%.*]] = add i32 undef, [[MUL_411]] +; CHECK-NEXT: [[MUL_613:%.*]] = add i32 undef, [[MUL_512]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -33,6 +39,7 @@ define i32 @test_add(i32* nocapture readonly %p) { ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 +; CHECK-NEXT: [[MUL_714:%.*]] = add i32 undef, [[MUL_613]] ; CHECK-NEXT: ret i32 [[TMP2]] ; entry: @@ -140,6 +147,12 @@ define i32 @test_and(i32* nocapture readonly %p) { ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: [[MUL_18:%.*]] = and i32 undef, undef +; CHECK-NEXT: [[MUL_29:%.*]] = and i32 undef, [[MUL_18]] +; CHECK-NEXT: [[MUL_310:%.*]] = and i32 undef, [[MUL_29]] +; CHECK-NEXT: [[MUL_411:%.*]] = and i32 undef, [[MUL_310]] +; CHECK-NEXT: [[MUL_512:%.*]] = and i32 undef, [[MUL_411]] +; CHECK-NEXT: [[MUL_613:%.*]] = and i32 undef, [[MUL_512]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = and <8 x i32> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -147,6 +160,7 @@ define i32 @test_and(i32* nocapture readonly %p) { ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 +; CHECK-NEXT: [[MUL_714:%.*]] = and i32 undef, [[MUL_613]] ; CHECK-NEXT: ret i32 [[TMP2]] ; entry: @@ -194,6 +208,12 @@ define i32 @test_or(i32* nocapture readonly %p) { ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: [[MUL_18:%.*]] = or i32 undef, undef +; CHECK-NEXT: [[MUL_29:%.*]] = or i32 undef, [[MUL_18]] +; CHECK-NEXT: [[MUL_310:%.*]] = or i32 undef, [[MUL_29]] +; CHECK-NEXT: [[MUL_411:%.*]] = or i32 undef, [[MUL_310]] +; CHECK-NEXT: [[MUL_512:%.*]] = or i32 undef, [[MUL_411]] +; CHECK-NEXT: [[MUL_613:%.*]] = or i32 undef, [[MUL_512]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <8 x i32> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -201,6 +221,7 @@ define i32 @test_or(i32* nocapture readonly %p) { ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = or <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 +; CHECK-NEXT: [[MUL_714:%.*]] = or i32 undef, [[MUL_613]] ; CHECK-NEXT: ret i32 [[TMP2]] ; entry: @@ -248,6 +269,12 @@ define i32 @test_xor(i32* nocapture readonly %p) { ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: [[MUL_18:%.*]] = xor i32 undef, undef +; CHECK-NEXT: [[MUL_29:%.*]] = xor i32 undef, [[MUL_18]] +; CHECK-NEXT: [[MUL_310:%.*]] = xor i32 undef, [[MUL_29]] +; CHECK-NEXT: [[MUL_411:%.*]] = xor i32 undef, [[MUL_310]] +; CHECK-NEXT: [[MUL_512:%.*]] = xor i32 undef, [[MUL_411]] +; CHECK-NEXT: [[MUL_613:%.*]] = xor i32 undef, [[MUL_512]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = xor <8 x i32> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -255,6 +282,7 @@ define i32 @test_xor(i32* nocapture readonly %p) { ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = xor <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 +; CHECK-NEXT: [[MUL_714:%.*]] = xor i32 undef, [[MUL_613]] ; CHECK-NEXT: ret i32 [[TMP2]] ; entry: @@ -294,12 +322,15 @@ define i32 @PR37731(<4 x i32>* noalias nocapture dereferenceable(16) %self) unna ; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i32> [[TMP4]], ; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i32> [[TMP3]], [[TMP5]] ; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[SELF]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = xor i32 undef, undef +; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[TMP7]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = xor <4 x i32> [[TMP6]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = xor <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 -; CHECK-NEXT: ret i32 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], undef +; CHECK-NEXT: ret i32 [[TMP9]] ; entry: %0 = load <4 x i32>, <4 x i32>* %self, align 16 diff --git a/test/Transforms/SLPVectorizer/X86/remark_horcost.ll b/test/Transforms/SLPVectorizer/X86/remark_horcost.ll index 4a69a35420b..27997f6af3d 100644 --- a/test/Transforms/SLPVectorizer/X86/remark_horcost.ll +++ b/test/Transforms/SLPVectorizer/X86/remark_horcost.ll @@ -33,8 +33,11 @@ define i32 @foo(i32* %diff) #0 { ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[ARRAYIDX2]] to <4 x i32>* ; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP11]], align 4 ; CHECK-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> [[TMP12]], [[TMP9]] +; CHECK-NEXT: [[ADD10:%.*]] = add nsw i32 undef, [[A_088]] ; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 1 +; CHECK-NEXT: [[ADD24:%.*]] = add nsw i32 [[ADD10]], undef ; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 2 +; CHECK-NEXT: [[ADD38:%.*]] = add nsw i32 [[ADD24]], undef ; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 3 ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[ARRAYIDX6]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* [[TMP14]], align 16 @@ -44,6 +47,7 @@ define i32 @foo(i32* %diff) #0 { ; CHECK-NEXT: [[BIN_RDX2:%.*]] = add nsw <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 ; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP15]], [[A_088]] +; CHECK-NEXT: [[ADD52:%.*]] = add nsw i32 [[ADD38]], undef ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] diff --git a/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll b/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll index 768cba9c37b..13884efd98d 100644 --- a/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll +++ b/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll @@ -19,6 +19,11 @@ define void @hoge() { ; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i32> [[TMP3]], undef ; CHECK-NEXT: [[SHUFFLE8:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> undef, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[SHUFFLE8]], +; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 undef, undef +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 undef, i32 undef +; CHECK-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], undef +; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef +; CHECK-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], undef ; CHECK-NEXT: [[RDX_SHUF9:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP10:%.*]] = icmp sgt <4 x i32> [[TMP5]], [[RDX_SHUF9]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT11:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP10]], <4 x i32> [[TMP5]], <4 x i32> [[RDX_SHUF9]] @@ -26,12 +31,28 @@ define void @hoge() { ; CHECK-NEXT: [[RDX_MINMAX_CMP13:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT11]], [[RDX_SHUF12]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT14:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP13]], <4 x i32> [[RDX_MINMAX_SELECT11]], <4 x i32> [[RDX_SHUF12]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT14]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef ; CHECK-NEXT: [[TMP19:%.*]] = select i1 undef, i32 [[TMP6]], i32 undef ; CHECK-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], 63 ; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <2 x i32> undef, [[TMP2]] ; CHECK-NEXT: [[TMP8:%.*]] = sub <2 x i32> [[TMP7]], undef ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP26:%.*]] = icmp sgt i32 undef, undef +; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 undef, i32 undef +; CHECK-NEXT: [[TMP28:%.*]] = icmp sgt i32 [[TMP27]], undef +; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 undef, i32 [[TMP27]] +; CHECK-NEXT: [[TMP31:%.*]] = icmp sgt i32 undef, undef +; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 undef, i32 undef +; CHECK-NEXT: [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], [[TMP29]] +; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP29]], i32 [[TMP32]] +; CHECK-NEXT: [[TMP36:%.*]] = icmp sgt i32 undef, undef +; CHECK-NEXT: [[TMP37:%.*]] = select i1 [[TMP36]], i32 undef, i32 undef +; CHECK-NEXT: [[TMP38:%.*]] = icmp sgt i32 [[TMP37]], [[TMP34]] +; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP34]], i32 [[TMP37]] +; CHECK-NEXT: [[TMP41:%.*]] = icmp sgt i32 undef, undef +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 undef, i32 undef +; CHECK-NEXT: [[TMP43:%.*]] = icmp sgt i32 [[TMP42]], [[TMP39]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP9]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP9]], <4 x i32> [[RDX_SHUF]] @@ -49,6 +70,7 @@ define void @hoge() { ; CHECK-NEXT: [[OP_EXTRA6:%.*]] = select i1 [[TMP14]], i32 [[OP_EXTRA5]], i32 undef ; CHECK-NEXT: [[TMP15:%.*]] = icmp slt i32 [[OP_EXTRA6]], undef ; CHECK-NEXT: [[OP_EXTRA7:%.*]] = select i1 [[TMP15]], i32 [[OP_EXTRA6]], i32 undef +; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP39]], i32 [[TMP42]] ; CHECK-NEXT: [[TMP45:%.*]] = icmp sgt i32 undef, [[OP_EXTRA7]] ; CHECK-NEXT: unreachable ; diff --git a/test/Transforms/SLPVectorizer/X86/undef_vect.ll b/test/Transforms/SLPVectorizer/X86/undef_vect.ll index 822c362ec52..7ecd5805f93 100644 --- a/test/Transforms/SLPVectorizer/X86/undef_vect.ll +++ b/test/Transforms/SLPVectorizer/X86/undef_vect.ll @@ -16,6 +16,15 @@ define void @_Z2azv() local_unnamed_addr { ; CHECK-NEXT: [[DOTSROA_RAW_IDX_7:%.*]] = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76"* undef, i64 7, i32 1 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTSROA_CAST_4]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: [[CMP_I1_4:%.*]] = icmp slt i32 undef, undef +; CHECK-NEXT: [[DOTSROA_SPECULATED_4:%.*]] = select i1 [[CMP_I1_4]], i32 undef, i32 undef +; CHECK-NEXT: [[CMP_I1_5:%.*]] = icmp slt i32 [[DOTSROA_SPECULATED_4]], undef +; CHECK-NEXT: [[DOTSROA_SPECULATED_5:%.*]] = select i1 [[CMP_I1_5]], i32 undef, i32 [[DOTSROA_SPECULATED_4]] +; CHECK-NEXT: [[CMP_I1_6:%.*]] = icmp slt i32 [[DOTSROA_SPECULATED_5]], undef +; CHECK-NEXT: [[DOTSROA_SPECULATED_6:%.*]] = select i1 [[CMP_I1_6]], i32 undef, i32 [[DOTSROA_SPECULATED_5]] +; CHECK-NEXT: [[CMP_I1_7:%.*]] = icmp slt i32 [[DOTSROA_SPECULATED_6]], undef +; CHECK-NEXT: [[DOTSROA_SPECULATED_7:%.*]] = select i1 [[CMP_I1_7]], i32 undef, i32 [[DOTSROA_SPECULATED_6]] +; CHECK-NEXT: [[CMP_I1_8:%.*]] = icmp slt i32 undef, undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP1]], <8 x i32> [[RDX_SHUF]] @@ -30,6 +39,7 @@ define void @_Z2azv() local_unnamed_addr { ; CHECK-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 undef ; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[OP_EXTRA]], undef ; CHECK-NEXT: [[OP_EXTRA7:%.*]] = select i1 [[TMP4]], i32 [[OP_EXTRA]], i32 undef +; CHECK-NEXT: [[DOTSROA_SPECULATED_8:%.*]] = select i1 [[CMP_I1_8]], i32 undef, i32 undef ; CHECK-NEXT: [[DOTSROA_SPECULATED_9:%.*]] = select i1 undef, i32 undef, i32 [[OP_EXTRA7]] ; CHECK-NEXT: [[CMP_I1_10:%.*]] = icmp slt i32 [[DOTSROA_SPECULATED_9]], undef ; CHECK-NEXT: ret void diff --git a/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll b/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll index 779d5260506..889bba80b7a 100644 --- a/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll +++ b/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll @@ -18,6 +18,19 @@ define i32 @foo(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7 ; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 undef, undef +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef +; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], undef +; CHECK-NEXT: [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32 undef +; CHECK-NEXT: [[CMP20:%.*]] = icmp ult i32 [[COND19]], undef +; CHECK-NEXT: [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]], i32 undef +; CHECK-NEXT: [[CMP25:%.*]] = icmp ult i32 [[COND24]], undef +; CHECK-NEXT: [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]], i32 undef +; CHECK-NEXT: [[CMP30:%.*]] = icmp ult i32 [[COND29]], undef +; CHECK-NEXT: [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]], i32 undef +; CHECK-NEXT: [[CMP35:%.*]] = icmp ult i32 [[COND34]], undef +; CHECK-NEXT: [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]], i32 undef +; CHECK-NEXT: [[CMP40:%.*]] = icmp ult i32 [[COND39]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i32> [[TMP10]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP10]], <8 x i32> [[RDX_SHUF]] @@ -28,6 +41,7 @@ define i32 @foo(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4 ; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]] ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 +; CHECK-NEXT: [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]], i32 undef ; CHECK-NEXT: ret i32 [[TMP11]] ; entry: @@ -78,6 +92,19 @@ define i32 @foo1(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7 ; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 undef, undef +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef +; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], undef +; CHECK-NEXT: [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32 undef +; CHECK-NEXT: [[CMP20:%.*]] = icmp ult i32 [[COND19]], undef +; CHECK-NEXT: [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]], i32 undef +; CHECK-NEXT: [[CMP25:%.*]] = icmp ult i32 [[COND24]], undef +; CHECK-NEXT: [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]], i32 undef +; CHECK-NEXT: [[CMP30:%.*]] = icmp ult i32 [[COND29]], undef +; CHECK-NEXT: [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]], i32 undef +; CHECK-NEXT: [[CMP35:%.*]] = icmp ult i32 [[COND34]], undef +; CHECK-NEXT: [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]], i32 undef +; CHECK-NEXT: [[CMP40:%.*]] = icmp ult i32 [[COND39]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i32> [[TMP10]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP10]], <8 x i32> [[RDX_SHUF]] @@ -88,6 +115,7 @@ define i32 @foo1(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a ; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]] ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 +; CHECK-NEXT: [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]], i32 undef ; CHECK-NEXT: ret i32 [[TMP11]] ; entry: @@ -142,6 +170,19 @@ define i32 @foo2(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7 ; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 undef, undef +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef +; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], undef +; CHECK-NEXT: [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32 undef +; CHECK-NEXT: [[CMP20:%.*]] = icmp ult i32 [[COND19]], undef +; CHECK-NEXT: [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]], i32 undef +; CHECK-NEXT: [[CMP25:%.*]] = icmp ult i32 [[COND24]], undef +; CHECK-NEXT: [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]], i32 undef +; CHECK-NEXT: [[CMP30:%.*]] = icmp ult i32 [[COND29]], undef +; CHECK-NEXT: [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]], i32 undef +; CHECK-NEXT: [[CMP35:%.*]] = icmp ult i32 [[COND34]], undef +; CHECK-NEXT: [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]], i32 undef +; CHECK-NEXT: [[CMP40:%.*]] = icmp ult i32 [[COND39]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i32> [[TMP10]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP10]], <8 x i32> [[RDX_SHUF]] @@ -152,6 +193,7 @@ define i32 @foo2(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a ; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]] ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 +; CHECK-NEXT: [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]], i32 undef ; CHECK-NEXT: ret i32 [[TMP11]] ; entry: