[SLP] Preserve IR flags when vectorizing horizontal reductions.

author Alexey Bataev <a.bataev@hotmail.com>

Wed, 1 Mar 2017 12:43:39 +0000 (12:43 +0000)

committer Alexey Bataev <a.bataev@hotmail.com>

Wed, 1 Mar 2017 12:43:39 +0000 (12:43 +0000)
author Alexey Bataev <a.bataev@hotmail.com>
Wed, 1 Mar 2017 12:43:39 +0000 (12:43 +0000)
committer Alexey Bataev <a.bataev@hotmail.com>
Wed, 1 Mar 2017 12:43:39 +0000 (12:43 +0000)
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp

index de18a6540465a432917932f819e6c7b7de4f5ece..b0c70285dd0afb07130ca70af002076985f64081 100644 (file)
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4459,11 +4459,12 @@ public:
  
        // Emit a reduction.
        Value *ReducedSubTree =
-          emitReduction(VectorizedRoot, Builder, ReduxWidth);
+          emitReduction(VectorizedRoot, Builder, ReduxWidth, ReductionOps);
        if (VectorizedTree) {
          Builder.SetCurrentDebugLocation(Loc);
          VectorizedTree = Builder.CreateBinOp(ReductionOpcode, VectorizedTree,
                                               ReducedSubTree, "bin.rdx");
+        propagateIRFlags(VectorizedTree, ReductionOps);
        } else
          VectorizedTree = ReducedSubTree;
        i += ReduxWidth;
@@ -4477,6 +4478,7 @@ public:
          Builder.SetCurrentDebugLocation(I->getDebugLoc());
          VectorizedTree =
              Builder.CreateBinOp(ReductionOpcode, VectorizedTree, I);
+        propagateIRFlags(VectorizedTree, ReductionOps);
        }
        for (auto &Pair : ExternallyUsedValues) {
          assert(!Pair.second.empty() &&
@@ -4527,7 +4529,7 @@ private:
  
    /// \brief Emit a horizontal reduction of the vectorized value.
    Value *emitReduction(Value *VectorizedValue, IRBuilder<> &Builder,
-                       unsigned ReduxWidth) {
+                       unsigned ReduxWidth, ArrayRef<Value *> RedOps) {
      assert(VectorizedValue && "Need to have a vectorized tree node");
      assert(isPowerOf2_32(ReduxWidth) &&
             "We only handle power-of-two reductions for now");
@@ -4554,6 +4556,7 @@ private:
            TmpVec, UndefValue::get(TmpVec->getType()), UpperHalf, "rdx.shuf");
          TmpVec = Builder.CreateBinOp(ReductionOpcode, TmpVec, Shuf, "bin.rdx");
        }
+      propagateIRFlags(TmpVec, RedOps);
      }
  
      // The result is in the first element of the vector.
diff --git a/test/Transforms/SLPVectorizer/X86/horizontal.ll b/test/Transforms/SLPVectorizer/X86/horizontal.ll

index 715da3e3abf902c995db8cf3a104810be82680ff..080f850f91cffb7664e367c26473c5b15d6d1f92 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/horizontal.ll
+++ b/test/Transforms/SLPVectorizer/X86/horizontal.ll
@@ -624,9 +624,9 @@ define void @i32_red_example4(i32* %res) {
  ; STORE-LABEL: @i32_red_example4(
  ; STORE:         [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr_i32 to <4 x i32>*), align 16
  ; STORE:         [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
-; STORE-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP0]], [[RDX_SHUF]]
+; STORE-NEXT:    [[BIN_RDX:%.*]] = add nsw <4 x i32> [[TMP0]], [[RDX_SHUF]]
  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-; STORE-NEXT:    [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
+; STORE-NEXT:    [[BIN_RDX2:%.*]] = add nsw <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
  ; STORE:         store i32 [[TMP1]], i32* %res, align 16
  ; STORE-NEXT:    ret void
@@ -647,11 +647,11 @@ define void @i32_red_example8(i32* %res) {
  ; STORE-LABEL: @i32_red_example8(
  ; STORE:         [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16
  ; STORE:         [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
-; STORE-NEXT:    [[BIN_RDX:%.*]] = add <8 x i32> [[TMP0]], [[RDX_SHUF]]
+; STORE-NEXT:    [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]]
  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; STORE-NEXT:    [[BIN_RDX2:%.*]] = add <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
+; STORE-NEXT:    [[BIN_RDX2:%.*]] = add nsw <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
  ; STORE-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; STORE-NEXT:    [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
+; STORE-NEXT:    [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
  ; STORE:         store i32 [[TMP1]], i32* %res, align 16
  ; STORE-NEXT:    ret void
@@ -680,13 +680,13 @@ define void @i32_red_example16(i32* %res) {
  ; STORE-LABEL: @i32_red_example16(
  ; STORE:         [[TMP0:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr_i32 to <16 x i32>*), align 16
  ; STORE:         [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; STORE-NEXT:    [[BIN_RDX:%.*]] = add <16 x i32> [[TMP0]], [[RDX_SHUF]]
+; STORE-NEXT:    [[BIN_RDX:%.*]] = add nsw <16 x i32> [[TMP0]], [[RDX_SHUF]]
  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[BIN_RDX]], <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; STORE-NEXT:    [[BIN_RDX2:%.*]] = add <16 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
+; STORE-NEXT:    [[BIN_RDX2:%.*]] = add nsw <16 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
  ; STORE-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <16 x i32> [[BIN_RDX2]], <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; STORE-NEXT:    [[BIN_RDX4:%.*]] = add <16 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
+; STORE-NEXT:    [[BIN_RDX4:%.*]] = add nsw <16 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
  ; STORE-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <16 x i32> [[BIN_RDX4]], <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; STORE-NEXT:    [[BIN_RDX6:%.*]] = add <16 x i32> [[BIN_RDX4]], [[RDX_SHUF5]]
+; STORE-NEXT:    [[BIN_RDX6:%.*]] = add nsw <16 x i32> [[BIN_RDX4]], [[RDX_SHUF5]]
  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <16 x i32> [[BIN_RDX6]], i32 0
  ; STORE:         store i32 [[TMP1]], i32* %res, align 16
  ; STORE-NEXT:    ret void
@@ -731,15 +731,15 @@ define void @i32_red_example32(i32* %res) {
  ; STORE-LABEL: @i32_red_example32(
  ; STORE:         [[TMP0:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr_i32 to <32 x i32>*), align 16
  ; STORE:         [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; STORE-NEXT:    [[BIN_RDX:%.*]] = add <32 x i32> [[TMP0]], [[RDX_SHUF]]
+; STORE-NEXT:    [[BIN_RDX:%.*]] = add nsw <32 x i32> [[TMP0]], [[RDX_SHUF]]
  ; STORE-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <32 x i32> [[BIN_RDX]], <32 x i32> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; STORE-NEXT:    [[BIN_RDX2:%.*]] = add <32 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
+; STORE-NEXT:    [[BIN_RDX2:%.*]] = add nsw <32 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
  ; STORE-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <32 x i32> [[BIN_RDX2]], <32 x i32> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; STORE-NEXT:    [[BIN_RDX4:%.*]] = add <32 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
+; STORE-NEXT:    [[BIN_RDX4:%.*]] = add nsw <32 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
  ; STORE-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <32 x i32> [[BIN_RDX4]], <32 x i32> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; STORE-NEXT:    [[BIN_RDX6:%.*]] = add <32 x i32> [[BIN_RDX4]], [[RDX_SHUF5]]
+; STORE-NEXT:    [[BIN_RDX6:%.*]] = add nsw <32 x i32> [[BIN_RDX4]], [[RDX_SHUF5]]
  ; STORE-NEXT:    [[RDX_SHUF7:%.*]] = shufflevector <32 x i32> [[BIN_RDX6]], <32 x i32> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; STORE-NEXT:    [[BIN_RDX8:%.*]] = add <32 x i32> [[BIN_RDX6]], [[RDX_SHUF7]]
+; STORE-NEXT:    [[BIN_RDX8:%.*]] = add nsw <32 x i32> [[BIN_RDX6]], [[RDX_SHUF7]]
  ; STORE-NEXT:    [[TMP1:%.*]] = extractelement <32 x i32> [[BIN_RDX8]], i32 0
  ; STORE:         store i32 [[TMP1]], i32* %res, align 16
  ; STORE-NEXT:    ret void
diff --git a/test/Transforms/SLPVectorizer/X86/scheduling.ll b/test/Transforms/SLPVectorizer/X86/scheduling.ll

index 8395401c5df0b44a5c33b705e04d1c7ece9669d9..c4f521c8963efb00dbb333c596497fa5e11152d7 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/scheduling.ll
+++ b/test/Transforms/SLPVectorizer/X86/scheduling.ll
@@ -8,9 +8,9 @@ define i32 @foo(i32* nocapture readonly %diff) #0 {
  ; CHECK: [[S1:%.+]] = add nsw <4 x i32>
  ; CHECK: store <4 x i32> [[S1]],
  ; CHECK:         [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[S1]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
-; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[S1]], [[RDX_SHUF]]
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = add nsw <4 x i32> [[S1]], [[RDX_SHUF]]
  ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
+; CHECK-NEXT:    [[BIN_RDX2:%.*]] = add nsw <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
  ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
  ; CHECK:         [[ADD52:%.*]] = add nsw i32 [[TMP15]],
  ; CHECK:          ret i32 [[ADD52]]
author	Alexey Bataev <a.bataev@hotmail.com>
	Wed, 1 Mar 2017 12:43:39 +0000 (12:43 +0000)
committer	Alexey Bataev <a.bataev@hotmail.com>
	Wed, 1 Mar 2017 12:43:39 +0000 (12:43 +0000)
lib/Transforms/Vectorize/SLPVectorizer.cpp		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/horizontal.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/scheduling.ll		patch \| blob \| history