[LV] Fix PR34711 - widen instruction ranges when sinking casts

author Ayal Zaks <ayal.zaks@intel.com>

Thu, 5 Oct 2017 12:41:49 +0000 (12:41 +0000)

committer Ayal Zaks <ayal.zaks@intel.com>

Thu, 5 Oct 2017 12:41:49 +0000 (12:41 +0000)
author Ayal Zaks <ayal.zaks@intel.com>
Thu, 5 Oct 2017 12:41:49 +0000 (12:41 +0000)
committer Ayal Zaks <ayal.zaks@intel.com>
Thu, 5 Oct 2017 12:41:49 +0000 (12:41 +0000)
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp

index 6f53b08ef4c5a344838cbf360f3d3c992c410dc6..778fed036142435a72b05339016bbdfa4f509189 100644 (file)
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2215,14 +2215,13 @@ private:
    VPWidenIntOrFpInductionRecipe *tryToOptimizeInduction(Instruction *I,
                                                          VFRange &Range);
  
-  /// Check if \I can be widened within the given VF \p Range. If \I can be
-  /// widened for Range.Start, extend \p LastWidenRecipe to include \p I if
-  /// possible or else build a new VPWidenRecipe for it, and return the
-  /// VPWidenRecipe that includes \p I. If \p I cannot be widened for
-  /// Range.Start \return null. Range.End may be decreased to ensure same
-  /// decision from \p Range.Start to \p Range.End.
-  VPWidenRecipe *tryToWiden(Instruction *I, VPWidenRecipe *LastWidenRecipe,
-                            VFRange &Range);
+  /// Check if \p I can be widened within the given VF \p Range. If \p I can be
+  /// widened for \p Range.Start, check if the last recipe of \p VPBB can be
+  /// extended to include \p I or else build a new VPWidenRecipe for it and
+  /// append it to \p VPBB. Return true if \p I can be widened for Range.Start,
+  /// false otherwise. Range.End may be decreased to ensure same decision from
+  /// \p Range.Start to \p Range.End.
+  bool tryToWiden(Instruction *I, VPBasicBlock *VPBB, VFRange &Range);
  
    /// Build a VPReplicationRecipe for \p I and enclose it within a Region if it
    /// is predicated. \return \p VPBB augmented with this new recipe if \p I is
@@ -7988,11 +7987,11 @@ LoopVectorizationPlanner::tryToOptimizeInduction(Instruction *I,
    return nullptr;
  }
  
-VPWidenRecipe *LoopVectorizationPlanner::tryToWiden(
-    Instruction *I, VPWidenRecipe *LastWidenRecipe, VFRange &Range) {
+bool LoopVectorizationPlanner::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
+                                          VFRange &Range) {
  
    if (Legal->isScalarWithPredication(I))
-    return nullptr;
+    return false;
  
    auto IsVectorizableOpcode = [](unsigned Opcode) {
      switch (Opcode) {
@@ -8041,13 +8040,13 @@ VPWidenRecipe *LoopVectorizationPlanner::tryToWiden(
    };
  
    if (!IsVectorizableOpcode(I->getOpcode()))
-    return nullptr;
+    return false;
  
    if (CallInst *CI = dyn_cast<CallInst>(I)) {
      Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
      if (ID && (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end ||
                 ID == Intrinsic::lifetime_start))
-      return nullptr;
+      return false;
    }
  
    auto willWiden = [&](unsigned VF) -> bool {
@@ -8079,13 +8078,18 @@ VPWidenRecipe *LoopVectorizationPlanner::tryToWiden(
    };
  
    if (!getDecisionAndClampRange(willWiden, Range))
-    return nullptr;
+    return false;
  
    // Success: widen this instruction. We optimize the common case where
    // consecutive instructions can be represented by a single recipe.
-  if (LastWidenRecipe && LastWidenRecipe->appendInstruction(I))
-    return LastWidenRecipe;
-  return new VPWidenRecipe(I);
+  if (!VPBB->empty()) {
+    VPWidenRecipe *LastWidenRecipe = dyn_cast<VPWidenRecipe>(&VPBB->back());
+    if (LastWidenRecipe && LastWidenRecipe->appendInstruction(I))
+      return true;
+  }
+
+  VPBB->appendRecipe(new VPWidenRecipe(I));
+  return true;
  }
  
  VPBasicBlock *LoopVectorizationPlanner::handleReplication(
@@ -8182,7 +8186,6 @@ VPlan *LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
      auto *FirstVPBBForBB = new VPBasicBlock(BB->getName());
      VPBB->setOneSuccessor(FirstVPBBForBB);
      VPBB = FirstVPBBForBB;
-    VPWidenRecipe *LastWidenRecipe = nullptr;
  
      std::vector<Instruction *> Ingredients;
  
@@ -8250,12 +8253,8 @@ VPlan *LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
        // Check if Instr is to be widened by a general VPWidenRecipe, after
        // having first checked for specific widening recipes that deal with
        // Interleave Groups, Inductions and Phi nodes.
-      if ((Recipe = tryToWiden(Instr, LastWidenRecipe, Range))) {
-        if (Recipe != LastWidenRecipe)
-          VPBB->appendRecipe(Recipe);
-        LastWidenRecipe = cast<VPWidenRecipe>(Recipe);
+      if (tryToWiden(Instr, VPBB, Range))
          continue;
-      }
  
        // Otherwise, if all widening options failed, Instruction is to be
        // replicated. This may create a successor for VPBB.
diff --git a/test/Transforms/LoopVectorize/first-order-recurrence.ll b/test/Transforms/LoopVectorize/first-order-recurrence.ll

index bc9247f80717eb156b74b452e0b511a5be30a11e..998f412674bd3b073354e73badee8ec2b2cda39d 100644 (file)
--- a/test/Transforms/LoopVectorize/first-order-recurrence.ll
+++ b/test/Transforms/LoopVectorize/first-order-recurrence.ll
@@ -491,6 +491,55 @@ for.end:
    ret void
  }
  
+; PR34711: given three consecutive instructions such that the first will be
+; widened, the second is a cast that will be widened and needs to sink after the
+; third, and the third is a first-order-recurring load that will be replicated
+; instead of widened. Although the cast and the first instruction will both be
+; widened, and are originally adjacent to each other, make sure the replicated
+; load ends up appearing between them.
+;
+; void PR34711(short[2] *a, int *b, int *c, int n) {
+;   for(int i = 0; i < n; i++) {
+;     c[i] = 7;
+;     b[i] = (a[i][0] * a[i][1]);
+;   }
+; }
+;
+; SINK-AFTER-LABEL: @PR34711
+; Check that the sext sank after the load in the vector loop.
+; SINK-AFTER: vector.body
+; SINK-AFTER:   %vector.recur = phi <4 x i16> [ %vector.recur.init, %vector.ph ], [ {{.*}}, %vector.body ]
+; SINK-AFTER:   %[[VSHUF:.+]] = shufflevector <4 x i16> %vector.recur, <4 x i16> %{{.*}}, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; SINK-AFTER:   %[[VCONV:.+]] = sext <4 x i16> %[[VSHUF]] to <4 x i32>
+; SINK-AFTER:   %[[VCONV3:.+]] = sext <4 x i16> {{.*}} to <4 x i32>
+; SINK-AFTER:   mul nsw <4 x i32> %[[VCONV3]], %[[VCONV]]
+;
+define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) {
+entry:
+  %pre.index = getelementptr inbounds [2 x i16], [2 x i16]* %a, i64 0, i64 0
+  %.pre = load i16, i16* %pre.index
+  br label %for.body
+
+for.body:
+  %0 = phi i16 [ %.pre, %entry ], [ %1, %for.body ]
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arraycidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
+  %cur.index = getelementptr inbounds [2 x i16], [2 x i16]* %a, i64 %indvars.iv, i64 1
+  store i32 7, i32* %arraycidx   ; 1st instruction, to be widened.
+  %conv = sext i16 %0 to i32     ; 2nd, cast to sink after third.
+  %1 = load i16, i16* %cur.index ; 3rd, first-order-recurring load not widened.
+  %conv3 = sext i16 %1 to i32
+  %mul = mul nsw i32 %conv3, %conv
+  %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+  store i32 %mul, i32* %arrayidx5
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
  ; void no_sink_after(short *a, int n, int *b) {
  ;   for(int i = 0; i < n; i++)
  ;     b[i] = ((a[i] + 2) * a[i + 1]);
author	Ayal Zaks <ayal.zaks@intel.com>
	Thu, 5 Oct 2017 12:41:49 +0000 (12:41 +0000)
committer	Ayal Zaks <ayal.zaks@intel.com>
	Thu, 5 Oct 2017 12:41:49 +0000 (12:41 +0000)
lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history
test/Transforms/LoopVectorize/first-order-recurrence.ll		patch \| blob \| history