From: Alexey Bataev Date: Wed, 1 Mar 2017 12:22:33 +0000 (+0000) Subject: [SLP] Preserve IR flags for extra args. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b25d8fc2fa2a7509aab1d7e83fbbdc4e57c0d2c0;p=llvm [SLP] Preserve IR flags for extra args. Summary: We should preserve IR flags for extra args. These IR flags should be taken from original scalar operations, not from the reduction operations. Reviewers: mkuper, mzolotukhin, hfinkel Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D30447 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@296613 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index c6d26abc76a..de18a654046 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -212,14 +212,14 @@ static unsigned getSameOpcode(ArrayRef VL) { /// Flag set: NSW, NUW, exact, and all of fast-math. static void propagateIRFlags(Value *I, ArrayRef VL) { if (auto *VecOp = dyn_cast(I)) { - if (auto *Intersection = dyn_cast(VL[0])) { - // Intersection is initialized to the 0th scalar, - // so start counting from index '1'. + if (auto *I0 = dyn_cast(VL[0])) { + // VecOVp is initialized to the 0th scalar, so start counting from index + // '1'. + VecOp->copyIRFlags(I0); for (int i = 1, e = VL.size(); i < e; ++i) { if (auto *Scalar = dyn_cast(VL[i])) - Intersection->andIRFlags(Scalar); + VecOp->andIRFlags(Scalar); } - VecOp->copyIRFlags(Intersection); } } } @@ -304,7 +304,8 @@ public: typedef SmallVector InstrList; typedef SmallPtrSet ValueSet; typedef SmallVector StoreList; - typedef MapVector> ExtraValueToDebugLocsMap; + typedef MapVector> + ExtraValueToDebugLocsMap; BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li, @@ -4430,7 +4431,7 @@ public: // The same extra argument may be used several time, so log each attempt // to use it. for (auto &Pair : ExtraArgs) - ExternallyUsedValues[Pair.second].push_back(Pair.first->getDebugLoc()); + ExternallyUsedValues[Pair.second].push_back(Pair.first); while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) { auto VL = makeArrayRef(&ReducedVals[i], ReduxWidth); V.buildTree(VL, ExternallyUsedValues, ReductionOps); @@ -4481,10 +4482,11 @@ public: assert(!Pair.second.empty() && "At least one DebugLoc must be inserted"); // Add each externally used value to the final reduction. - for (auto &DL : Pair.second) { - Builder.SetCurrentDebugLocation(DL); + for (auto *I : Pair.second) { + Builder.SetCurrentDebugLocation(I->getDebugLoc()); VectorizedTree = Builder.CreateBinOp(ReductionOpcode, VectorizedTree, Pair.first, "bin.extra"); + propagateIRFlags(VectorizedTree, I); } } // Update users. diff --git a/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/test/Transforms/SLPVectorizer/X86/horizontal-list.ll index 3553bfbd829..73844037f12 100644 --- a/test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ b/test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -1679,8 +1679,8 @@ define i32 @wobble(i32 %arg, i32 %bar) { ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 -; CHECK-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP12]], [[ARG]] -; CHECK-NEXT: [[BIN_EXTRA3:%.*]] = add i32 [[BIN_EXTRA]], [[TMP9]] +; CHECK-NEXT: [[BIN_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]] +; CHECK-NEXT: [[BIN_EXTRA3:%.*]] = add nsw i32 [[BIN_EXTRA]], [[TMP9]] ; CHECK-NEXT: [[R5:%.*]] = add nsw i32 [[R4]], undef ; CHECK-NEXT: ret i32 [[BIN_EXTRA3]] ; @@ -1707,8 +1707,8 @@ define i32 @wobble(i32 %arg, i32 %bar) { ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]] ; THRESHOLD-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 -; THRESHOLD-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP12]], [[ARG]] -; THRESHOLD-NEXT: [[BIN_EXTRA3:%.*]] = add i32 [[BIN_EXTRA]], [[TMP9]] +; THRESHOLD-NEXT: [[BIN_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]] +; THRESHOLD-NEXT: [[BIN_EXTRA3:%.*]] = add nsw i32 [[BIN_EXTRA]], [[TMP9]] ; THRESHOLD-NEXT: [[R5:%.*]] = add nsw i32 [[R4]], undef ; THRESHOLD-NEXT: ret i32 [[BIN_EXTRA3]] ; diff --git a/test/Transforms/SLPVectorizer/X86/scheduling.ll b/test/Transforms/SLPVectorizer/X86/scheduling.ll index a01646b9dbe..8395401c5df 100644 --- a/test/Transforms/SLPVectorizer/X86/scheduling.ll +++ b/test/Transforms/SLPVectorizer/X86/scheduling.ll @@ -12,7 +12,7 @@ define i32 @foo(i32* nocapture readonly %diff) #0 { ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 -; CHECK: [[ADD52:%.*]] = add i32 [[TMP15]], +; CHECK: [[ADD52:%.*]] = add nsw i32 [[TMP15]], ; CHECK: ret i32 [[ADD52]] ; entry: