[SLPVectorizer] Merge reorderAltShuffleOperands into reorderInputsAccordingToOpcode

author Simon Pilgrim <llvm-dev@redking.me.uk>

Mon, 25 Mar 2019 20:05:27 +0000 (20:05 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Mon, 25 Mar 2019 20:05:27 +0000 (20:05 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Mon, 25 Mar 2019 20:05:27 +0000 (20:05 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Mon, 25 Mar 2019 20:05:27 +0000 (20:05 +0000)
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp

index 983b8fa4af1f84231897e2e2a1fbd0fe104d9dcd..9176b08f3895a9b631b04e7708f0698f88f5613e 100644 (file)
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -674,13 +674,6 @@ private:
    /// be beneficial even the tree height is tiny.
    bool isFullyVectorizableTinyTree();
  
-  /// \reorder commutative operands in alt shuffle if they result in
-  ///  vectorized code.
-  void reorderAltShuffleOperands(const InstructionsState &S,
-                                 ArrayRef<Value *> VL,
-                                 SmallVectorImpl<Value *> &Left,
-                                 SmallVectorImpl<Value *> &Right);
-
    /// \reorder commutative operands to get better probability of
    /// generating vectorized code.
    void reorderInputsAccordingToOpcode(const InstructionsState &S,
@@ -2072,7 +2065,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
        // Reorder operands if reordering would enable vectorization.
        if (isa<BinaryOperator>(VL0)) {
          ValueList Left, Right;
-        reorderAltShuffleOperands(S, VL, Left, Right);
+        reorderInputsAccordingToOpcode(S, VL, Left, Right);
          UserTreeIdx.EdgeIdx = 0;
          buildTree_rec(Left, Depth + 1, UserTreeIdx);
          UserTreeIdx.EdgeIdx = 1;
@@ -2787,63 +2780,6 @@ int BoUpSLP::getGatherCost(ArrayRef<Value *> VL) {
    return getGatherCost(VecTy, ShuffledElements);
  }
  
-// Reorder commutative operations in alternate shuffle if the resulting vectors
-// are consecutive loads. This would allow us to vectorize the tree.
-// If we have something like-
-// load a[0] - load b[0]
-// load b[1] + load a[1]
-// load a[2] - load b[2]
-// load a[3] + load b[3]
-// Reordering the second load b[1]  load a[1] would allow us to vectorize this
-// code.
-void BoUpSLP::reorderAltShuffleOperands(const InstructionsState &S,
-                                        ArrayRef<Value *> VL,
-                                        SmallVectorImpl<Value *> &Left,
-                                        SmallVectorImpl<Value *> &Right) {
-  // Push left and right operands of binary operation into Left and Right
-  for (Value *V : VL) {
-    auto *I = cast<Instruction>(V);
-    assert(S.isOpcodeOrAlt(I) && "Incorrect instruction in vector");
-    Left.push_back(I->getOperand(0));
-    Right.push_back(I->getOperand(1));
-  }
-
-  // Reorder if we have a commutative operation and consecutive access
-  // are on either side of the alternate instructions.
-  for (unsigned j = 0, e = VL.size() - 1; j < e; ++j) {
-    if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
-      if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
-        Instruction *VL1 = cast<Instruction>(VL[j]);
-        Instruction *VL2 = cast<Instruction>(VL[j + 1]);
-        if (VL1->isCommutative() && isConsecutiveAccess(L, L1, *DL, *SE)) {
-          std::swap(Left[j], Right[j]);
-          continue;
-        } else if (VL2->isCommutative() &&
-                   isConsecutiveAccess(L, L1, *DL, *SE)) {
-          std::swap(Left[j + 1], Right[j + 1]);
-          continue;
-        }
-        // else unchanged
-      }
-    }
-    if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
-      if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
-        Instruction *VL1 = cast<Instruction>(VL[j]);
-        Instruction *VL2 = cast<Instruction>(VL[j + 1]);
-        if (VL1->isCommutative() && isConsecutiveAccess(L, L1, *DL, *SE)) {
-          std::swap(Left[j], Right[j]);
-          continue;
-        } else if (VL2->isCommutative() &&
-                   isConsecutiveAccess(L, L1, *DL, *SE)) {
-          std::swap(Left[j + 1], Right[j + 1]);
-          continue;
-        }
-        // else unchanged
-      }
-    }
-  }
-}
-
  // Return true if the i'th left and right operands can be commuted.
  //
  // The vectorizer is trying to either have all elements one side being
@@ -2918,10 +2854,13 @@ void BoUpSLP::reorderInputsAccordingToOpcode(const InstructionsState &S,
                                               ArrayRef<Value *> VL,
                                               SmallVectorImpl<Value *> &Left,
                                               SmallVectorImpl<Value *> &Right) {
-  if (!VL.empty()) {
-    // Peel the first iteration out of the loop since there's nothing
-    // interesting to do anyway and it simplifies the checks in the loop.
-    auto *I = cast<Instruction>(VL[0]);
+  assert(!VL.empty() && Left.empty() && Right.empty() &&
+         "Unexpected instruction/operand lists");
+
+  // Push left and right operands of binary operation into Left and Right
+  for (Value *V : VL) {
+    auto *I = cast<Instruction>(V);
+    assert(S.isOpcodeOrAlt(I) && "Incorrect instruction in vector");
      Left.push_back(I->getOperand(0));
      Right.push_back(I->getOperand(1));
    }
@@ -2935,15 +2874,10 @@ void BoUpSLP::reorderInputsAccordingToOpcode(const InstructionsState &S,
  
    for (unsigned i = 1, e = VL.size(); i != e; ++i) {
      Instruction *I = cast<Instruction>(VL[i]);
-    assert(((I->getOpcode() == S.getOpcode() && I->isCommutative()) ||
-            (I->getOpcode() != S.getOpcode() &&
-             Instruction::isCommutative(S.getOpcode()))) &&
-           "Can only process commutative instruction");
      // Commute to favor either a splat or maximizing having the same opcodes on
      // one side.
-    Left.push_back(I->getOperand(0));
-    Right.push_back(I->getOperand(1));
-    if (shouldReorderOperands(i, Left, Right, AllSameOpcodeLeft,
+    if (I->isCommutative() &&
+        shouldReorderOperands(i, Left, Right, AllSameOpcodeLeft,
                                AllSameOpcodeRight, SplatLeft, SplatRight))
        std::swap(Left[i], Right[i]);
  
@@ -2965,11 +2899,11 @@ void BoUpSLP::reorderInputsAccordingToOpcode(const InstructionsState &S,
    // Finally check if we can get longer vectorizable chain by reordering
    // without breaking the good operand order detected above.
    // E.g. If we have something like-
-  // load a[0]  load b[0]
-  // load b[1]  load a[1]
-  // load a[2]  load b[2]
-  // load a[3]  load b[3]
-  // Reordering the second load b[1]  load a[1] would allow us to vectorize
+  // load a[0] - load b[0]
+  // load b[1] + load a[1]
+  // load a[2] - load b[2]
+  // load a[3] + load b[3]
+  // Reordering the second load b[1] + load a[1] would allow us to vectorize
    // this code and we still retain AllSameOpcode property.
    // FIXME: This load reordering might break AllSameOpcode in some rare cases
    // such as-
@@ -2981,16 +2915,32 @@ void BoUpSLP::reorderInputsAccordingToOpcode(const InstructionsState &S,
      if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
        if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
          if (isConsecutiveAccess(L, L1, *DL, *SE)) {
-          std::swap(Left[j + 1], Right[j + 1]);
-          continue;
+          auto *VL1 = cast<Instruction>(VL[j]);
+          auto *VL2 = cast<Instruction>(VL[j + 1]);
+          if (VL2->isCommutative()) {
+            std::swap(Left[j + 1], Right[j + 1]);
+            continue;
+          }
+          if (VL1->isCommutative()) {
+            std::swap(Left[j], Right[j]);
+            continue;
+          }
          }
        }
      }
      if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
        if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
          if (isConsecutiveAccess(L, L1, *DL, *SE)) {
-          std::swap(Left[j + 1], Right[j + 1]);
-          continue;
+          auto *VL1 = cast<Instruction>(VL[j]);
+          auto *VL2 = cast<Instruction>(VL[j + 1]);
+          if (VL2->isCommutative()) {
+            std::swap(Left[j + 1], Right[j + 1]);
+            continue;
+          }
+          if (VL1->isCommutative()) {
+            std::swap(Left[j], Right[j]);
+            continue;
+          }
          }
        }
      }
diff --git a/test/Transforms/SLPVectorizer/X86/PR39774.ll b/test/Transforms/SLPVectorizer/X86/PR39774.ll

index ae4a6b88bd31e1497094574c1328e39e19b0d349..24f75b32c5d2d0aa6975c957550b2c6bf26bd00e 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/PR39774.ll
+++ b/test/Transforms/SLPVectorizer/X86/PR39774.ll
@@ -81,9 +81,9 @@ define void @Test(i32) {
  ; CHECK-NEXT:    [[OP_EXTRA30:%.*]] = and i32 [[OP_EXTRA29]], [[TMP0]]
  ; CHECK-NEXT:    [[VAL_42:%.*]] = and i32 [[VAL_40]], undef
  ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> undef, i32 [[OP_EXTRA30]], i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP2]], i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 14910, i32 1
  ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 14910, i32 1
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP2]], i32 1
  ; CHECK-NEXT:    [[TMP9:%.*]] = and <2 x i32> [[TMP6]], [[TMP8]]
  ; CHECK-NEXT:    [[TMP10:%.*]] = add <2 x i32> [[TMP6]], [[TMP8]]
  ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> <i32 0, i32 3>
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Mon, 25 Mar 2019 20:05:27 +0000 (20:05 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Mon, 25 Mar 2019 20:05:27 +0000 (20:05 +0000)
lib/Transforms/Vectorize/SLPVectorizer.cpp		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/PR39774.ll		patch \| blob \| history