Teach the SLP Vectorizer that keeping some values live over a callsite can have a...

author James Molloy <james.molloy@arm.com>

Tue, 5 Aug 2014 12:30:34 +0000 (12:30 +0000)

committer James Molloy <james.molloy@arm.com>

Tue, 5 Aug 2014 12:30:34 +0000 (12:30 +0000)
author James Molloy <james.molloy@arm.com>
Tue, 5 Aug 2014 12:30:34 +0000 (12:30 +0000)
committer James Molloy <james.molloy@arm.com>
Tue, 5 Aug 2014 12:30:34 +0000 (12:30 +0000)
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h

index f57f3eb009a1e6e94d5ce7a4f09e6233786c8ada..0ec49c2b18aea04d8feda4d0ab3443f9d9682363 100644 (file)
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -416,6 +416,13 @@ public:
    virtual unsigned getAddressComputationCost(Type *Ty,
                                               bool IsComplex = false) const;
  
+  /// \returns The cost, if any, of keeping values of the given types alive
+  /// over a callsite.
+  ///
+  /// Some types may require the use of register classes that do not have
+  /// any callee-saved registers, so would require a spill and fill.
+  virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const;
+
    /// @}
  
    /// Analysis group identification.
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp

index 888b5cef2f8c5b47c413c114551a7dc3bcce62ce..7ac22303deb754d6af371392d58f4f4f574d5fae 100644 (file)
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -230,6 +230,11 @@ unsigned TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty,
    return PrevTTI->getReductionCost(Opcode, Ty, IsPairwise);
  }
  
+unsigned TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys)
+  const {
+  return PrevTTI->getCostOfKeepingLiveOverCall(Tys);
+}
+
  namespace {
  
  struct NoTTI final : ImmutablePass, TargetTransformInfo {
@@ -613,6 +618,11 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo {
    unsigned getReductionCost(unsigned, Type *, bool) const override {
      return 1;
    }
+
+  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const override {
+    return 0;
+  }
+
  };
  
  } // end anonymous namespace
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

index b1c931e96e83f6ad8fd48c2edbe2893c7aad98c9..2058dd06b21862d1c831c3890e16f36e5d89f67d 100644 (file)
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -124,6 +124,9 @@ public:
  
    unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
                             unsigned AddressSpace) const override;
+
+  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const override;
+
    /// @}
  };
  
@@ -498,3 +501,15 @@ unsigned AArch64TTI::getMemoryOpCost(unsigned Opcode, Type *Src,
  
    return LT.first;
  }
+
+unsigned AArch64TTI::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const {
+  unsigned Cost = 0;
+  for (auto *I : Tys) {
+    if (!I->isVectorTy())
+      continue;
+    if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
+      Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
+        getMemoryOpCost(Instruction::Load, I, 128, 0);
+  }
+  return Cost;
+}
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp

index c91ca2800334d83ea426a597cbc3e78c16626763..d73e746d1ead205fba86ca40c497d247e94c52e3 100644 (file)
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -361,6 +361,10 @@ public:
    /// Returns the vectorized root.
    Value *vectorizeTree();
  
+  /// \returns the cost incurred by unwanted spills and fills, caused by
+  /// holding live values over call sites.
+  int getSpillCost();
+
    /// \returns the vectorization cost of the subtree that starts at \p VL.
    /// A negative number means that this is profitable.
    int getTreeCost();
@@ -1543,6 +1547,68 @@ bool BoUpSLP::isFullyVectorizableTinyTree() {
    return true;
  }
  
+int BoUpSLP::getSpillCost() {
+  // Walk from the bottom of the tree to the top, tracking which values are
+  // live. When we see a call instruction that is not part of our tree,
+  // query TTI to see if there is a cost to keeping values live over it
+  // (for example, if spills and fills are required).
+  unsigned BundleWidth = VectorizableTree.front().Scalars.size();
+  int Cost = 0;
+
+  SmallPtrSet<Instruction*, 4> LiveValues;
+  Instruction *PrevInst = nullptr; 
+
+  for (unsigned N = 0; N < VectorizableTree.size(); ++N) {
+    Instruction *Inst = dyn_cast<Instruction>(VectorizableTree[N].Scalars[0]);
+    if (!Inst)
+      continue;
+
+    if (!PrevInst) {
+      PrevInst = Inst;
+      continue;
+    }
+
+    DEBUG(
+      dbgs() << "SLP: #LV: " << LiveValues.size();
+      for (auto *X : LiveValues)
+        dbgs() << " " << X->getName();
+      dbgs() << ", Looking at ";
+      Inst->dump();
+      );
+
+    // Update LiveValues.
+    LiveValues.erase(PrevInst);
+    for (auto &J : PrevInst->operands()) {
+      if (isa<Instruction>(&*J) && ScalarToTreeEntry.count(&*J))
+        LiveValues.insert(cast<Instruction>(&*J));
+    }    
+
+    // Now find the sequence of instructions between PrevInst and Inst.
+    BasicBlock::reverse_iterator InstIt(Inst), PrevInstIt(PrevInst);
+    --PrevInstIt;
+    while (InstIt != PrevInstIt) {
+      if (PrevInstIt == PrevInst->getParent()->rend()) {
+        PrevInstIt = Inst->getParent()->rbegin();
+        continue;
+      }
+
+      if (isa<CallInst>(&*PrevInstIt) && &*PrevInstIt != PrevInst) {
+        SmallVector<Type*, 4> V;
+        for (auto *II : LiveValues)
+          V.push_back(VectorType::get(II->getType(), BundleWidth));
+        Cost += TTI->getCostOfKeepingLiveOverCall(V);
+      }
+
+      ++PrevInstIt;
+    }
+
+    PrevInst = Inst;
+  }
+
+  DEBUG(dbgs() << "SLP: SpillCost=" << Cost << "\n");
+  return Cost;
+}
+
  int BoUpSLP::getTreeCost() {
    int Cost = 0;
    DEBUG(dbgs() << "SLP: Calculating cost for tree of size " <<
@@ -1578,6 +1644,8 @@ int BoUpSLP::getTreeCost() {
                                             I->Lane);
    }
  
+  Cost += getSpillCost();
+
    DEBUG(dbgs() << "SLP: Total Cost " << Cost + ExtractCost<< ".\n");
    return  Cost + ExtractCost;
  }
diff --git a/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll b/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll

new file mode 100644 (file)

index 0000000..45fa2f9
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll
@@ -0,0 +1,46 @@
+; RUN: opt -S -basicaa -slp-vectorizer < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios5.0.0"
+
+; Holding a value live over a call boundary may require
+; spills and fills. This is the case for <2 x double>,
+; as it occupies a Q register of which there are no
+; callee-saves.
+ 
+; CHECK: load double
+; CHECK: load double
+; CHECK: call void @g
+; CHECK: store double
+; CHECK: store double
+define void @f(double* %p, double* %q) {
+  %addr2 = getelementptr double* %q, i32 1
+  %addr = getelementptr double* %p, i32 1
+  %x = load double* %p
+  %y = load double* %addr
+  call void @g()
+  store double %x, double* %q
+  store double %y, double* %addr2
+  ret void
+}
+declare void @g()
+
+; Check we deal with loops correctly.
+;
+; CHECK: store <2 x double>
+; CHECK: load <2 x double>
+define void @f2(double* %p, double* %q) {
+entry:
+  br label %loop
+
+loop:
+  %p1 = phi double [0.0, %entry], [%x, %loop]
+  %p2 = phi double [0.0, %entry], [%y, %loop]
+  %addr2 = getelementptr double* %q, i32 1
+  %addr = getelementptr double* %p, i32 1
+  store double %p1, double* %q
+  store double %p2, double* %addr2
+
+  %x = load double* %p
+  %y = load double* %addr
+  br label %loop
+}
author	James Molloy <james.molloy@arm.com>
	Tue, 5 Aug 2014 12:30:34 +0000 (12:30 +0000)
committer	James Molloy <james.molloy@arm.com>
	Tue, 5 Aug 2014 12:30:34 +0000 (12:30 +0000)
include/llvm/Analysis/TargetTransformInfo.h		patch \| blob \| history
lib/Analysis/TargetTransformInfo.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64TargetTransformInfo.cpp		patch \| blob \| history
lib/Transforms/Vectorize/SLPVectorizer.cpp		patch \| blob \| history
test/Transforms/SLPVectorizer/AArch64/load-store-q.ll	[new file with mode: 0644]	patch \| blob