This patch recognizes (+ (+ v0, v1) (+ v2, v3)), reorders them for bundling into...

author Suyog Sarda <suyog.sarda@samsung.com>

Fri, 12 Dec 2014 12:53:44 +0000 (12:53 +0000)

committer Suyog Sarda <suyog.sarda@samsung.com>

Fri, 12 Dec 2014 12:53:44 +0000 (12:53 +0000)
author Suyog Sarda <suyog.sarda@samsung.com>
Fri, 12 Dec 2014 12:53:44 +0000 (12:53 +0000)
committer Suyog Sarda <suyog.sarda@samsung.com>
Fri, 12 Dec 2014 12:53:44 +0000 (12:53 +0000)
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp

index 44bfea14670623976c21f41c5b8080716b8256d9..dafda21da8db246b96bd21a62e5af8126f4dd460 100644 (file)
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -439,6 +439,13 @@ public:
    /// \returns true if the memory operations A and B are consecutive.
    bool isConsecutiveAccess(Value *A, Value *B);
  
+  /// For consecutive loads (+(+ v0, v1)(+ v2, v3)), Left had v0 and v2
+  /// while Right had v1 and v3, which prevented bundling them into
+  /// a vector of loads. Rorder them so that Left now has v0 and v1
+  /// while Right has v2 and v3 enabling their bundling into a vector.
+  void reorderIfConsecutiveLoads(SmallVectorImpl<Value *> &Left,
+                                 SmallVectorImpl<Value *> &Right);
+
    /// \brief Perform LICM and CSE on the newly generated gather sequences.
    void optimizeGatherSequence();
  
@@ -1234,6 +1241,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
        if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
          ValueList Left, Right;
          reorderInputsAccordingToOpcode(VL, Left, Right);
+        reorderIfConsecutiveLoads (Left, Right);
          buildTree_rec(Left, Depth + 1);
          buildTree_rec(Right, Depth + 1);
          return;
@@ -1818,6 +1826,19 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
    return X == PtrSCEVB;
  }
  
+void BoUpSLP::reorderIfConsecutiveLoads(SmallVectorImpl<Value *> &Left,
+                                        SmallVectorImpl<Value *> &Right) {
+  for (unsigned i = 0, e = Left.size(); i < e - 1; ++i) {
+    if (!isa<LoadInst>(Left[i]) || !isa<LoadInst>(Right[i]))
+      return;
+    if (!(isConsecutiveAccess(Left[i], Right[i]) &&
+          isConsecutiveAccess(Right[i], Left[i + 1])))
+      continue;
+    else
+      std::swap(Left[i + 1], Right[i]);
+  }
+}
+
  void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) {
    Instruction *VL0 = cast<Instruction>(VL[0]);
    BasicBlock::iterator NextInst = VL0;
@@ -2048,9 +2069,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
      case Instruction::Or:
      case Instruction::Xor: {
        ValueList LHSVL, RHSVL;
-      if (isa<BinaryOperator>(VL0) && VL0->isCommutative())
+      if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
          reorderInputsAccordingToOpcode(E->Scalars, LHSVL, RHSVL);
-      else
+        reorderIfConsecutiveLoads(LHSVL, RHSVL);
+      } else
          for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
            LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
            RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
diff --git a/test/Transforms/SLPVectorizer/AArch64/horizontaladd.ll b/test/Transforms/SLPVectorizer/AArch64/horizontaladd.ll

new file mode 100644 (file)

index 0000000..98de5e7
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/AArch64/horizontaladd.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=aarch64-unknown-linux-gnu -mcpu=cortex-a57 | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; float hadd (float *a) {
+;   return (a[0] + a[1]) + (a[2] + a[3]);
+; }
+
+; CHECK_LABEL: @hadd
+; CHECK: load <2 x float>*
+; CHECK: fadd <2 x float>
+; CHECK: extractelement <2 x float>
+ 
+define float @hadd(float* nocapture readonly %a) {
+entry:
+  %0 = load float* %a, align 4
+  %arrayidx1 = getelementptr inbounds float* %a, i64 1
+  %1 = load float* %arrayidx1, align 4
+  %add = fadd float %0, %1
+  %arrayidx2 = getelementptr inbounds float* %a, i64 2
+  %2 = load float* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds float* %a, i64 3
+  %3 = load float* %arrayidx3, align 4
+  %add4 = fadd float %2, %3
+  %add5 = fadd float %add, %add4
+  ret float %add5
+}
author	Suyog Sarda <suyog.sarda@samsung.com>
	Fri, 12 Dec 2014 12:53:44 +0000 (12:53 +0000)
committer	Suyog Sarda <suyog.sarda@samsung.com>
	Fri, 12 Dec 2014 12:53:44 +0000 (12:53 +0000)
lib/Transforms/Vectorize/SLPVectorizer.cpp		patch \| blob \| history
test/Transforms/SLPVectorizer/AArch64/horizontaladd.ll	[new file with mode: 0644]	patch \| blob