[NFC][ARM[ParallelDSP] Cleanup of BinOpChain

author Sam Parker <sam.parker@arm.com>

Mon, 29 Jul 2019 08:41:51 +0000 (08:41 +0000)

committer Sam Parker <sam.parker@arm.com>

Mon, 29 Jul 2019 08:41:51 +0000 (08:41 +0000)
author Sam Parker <sam.parker@arm.com>
Mon, 29 Jul 2019 08:41:51 +0000 (08:41 +0000)
committer Sam Parker <sam.parker@arm.com>
Mon, 29 Jul 2019 08:41:51 +0000 (08:41 +0000)
diff --git a/lib/Target/ARM/ARMParallelDSP.cpp b/lib/Target/ARM/ARMParallelDSP.cpp

index 5b1707ba7f1469bb1afcc0a646decce17e670801..8434111d807313d14b6b575e4c6b153055668a28 100644 (file)
--- a/lib/Target/ARM/ARMParallelDSP.cpp
+++ b/lib/Target/ARM/ARMParallelDSP.cpp
@@ -47,38 +47,32 @@ DisableParallelDSP("disable-arm-parallel-dsp", cl::Hidden, cl::init(false),
  
  namespace {
    struct OpChain;
-  struct BinOpChain;
+  struct MulCandidate;
    class Reduction;
  
-  using OpChainList     = SmallVector<std::unique_ptr<BinOpChain>, 8>;
+  using MulCandList     = SmallVector<std::unique_ptr<MulCandidate>, 8>;
    using ReductionList   = SmallVector<Reduction, 8>;
    using ValueList       = SmallVector<Value*, 8>;
    using MemInstList     = SmallVector<LoadInst*, 8>;
-  using PMACPair        = std::pair<BinOpChain*,BinOpChain*>;
+  using PMACPair        = std::pair<MulCandidate*,MulCandidate*>;
    using PMACPairList    = SmallVector<PMACPair, 8>;
-  using Instructions    = SmallVector<Instruction*,16>;
-  using MemLocList      = SmallVector<MemoryLocation, 4>;
  
-  // 'BinOpChain' holds the multiplication instructions that are candidates
+  // 'MulCandidate' holds the multiplication instructions that are candidates
    // for parallel execution.
-  struct BinOpChain {
+  struct MulCandidate {
      Instruction   *Root;
-    ValueList     AllValues;
-    MemInstList   VecLd;    // List of all load instructions.
-    ValueList     LHS;      // List of all (narrow) left hand operands.
-    ValueList     RHS;      // List of all (narrow) right hand operands.
+    MemInstList   VecLd;    // Container for loads to widen.
+    Value*        LHS;
+    Value*        RHS;
      bool          Exchange = false;
      bool          ReadOnly = true;
  
-    BinOpChain(Instruction *I, ValueList &lhs, ValueList &rhs) :
-      Root(I), LHS(lhs), RHS(rhs) {
-        for (auto *V : LHS)
-          AllValues.push_back(V);
-        for (auto *V : RHS)
-          AllValues.push_back(V);
-    }
+    MulCandidate(Instruction *I, ValueList &lhs, ValueList &rhs) :
+      Root(I), LHS(lhs.front()), RHS(rhs.front()) { }
  
-    unsigned size() const { return AllValues.size(); }
+    bool HasTwoLoadInputs() const {
+      return isa<LoadInst>(LHS) && isa<LoadInst>(RHS);
+    }
    };
  
    /// Represent a sequence of multiply-accumulate operations with the aim to
@@ -86,7 +80,7 @@ namespace {
    class Reduction {
      Instruction     *Root = nullptr;
      Value           *Acc = nullptr;
-    OpChainList     Muls;
+    MulCandList     Muls;
      PMACPairList        MulPairs;
      SmallPtrSet<Instruction*, 4> Adds;
  
@@ -98,10 +92,10 @@ namespace {
      /// Record an Add instruction that is a part of the this reduction.
      void InsertAdd(Instruction *I) { Adds.insert(I); }
  
-    /// Record a BinOpChain, rooted at a Mul instruction, that is a part of
+    /// Record a MulCandidate, rooted at a Mul instruction, that is a part of
      /// this reduction.
      void InsertMul(Instruction *I, ValueList &LHS, ValueList &RHS) {
-      Muls.push_back(make_unique<BinOpChain>(I, LHS, RHS));
+      Muls.push_back(make_unique<MulCandidate>(I, LHS, RHS));
      }
  
      /// Add the incoming accumulator value, returns true if a value had not
@@ -114,9 +108,9 @@ namespace {
        return true;
      }
  
-    /// Set two BinOpChains, rooted at muls, that can be executed as a single
+    /// Set two MulCandidates, rooted at muls, that can be executed as a single
      /// parallel operation.
-    void AddMulPair(BinOpChain *Mul0, BinOpChain *Mul1) {
+    void AddMulPair(MulCandidate *Mul0, MulCandidate *Mul1) {
        MulPairs.push_back(std::make_pair(Mul0, Mul1));
      }
  
@@ -133,11 +127,11 @@ namespace {
      /// Return the set of adds that comprise the reduction.
      SmallPtrSetImpl<Instruction*> &getAdds() { return Adds; }
  
-    /// Return the BinOpChain, rooted at mul instruction, that comprise the
+    /// Return the MulCandidate, rooted at mul instruction, that comprise the
      /// the reduction.
-    OpChainList &getMuls() { return Muls; }
+    MulCandList &getMuls() { return Muls; }
  
-    /// Return the BinOpChain, rooted at mul instructions, that have been
+    /// Return the MulCandidate, rooted at mul instructions, that have been
      /// paired for parallel execution.
      PMACPairList &getMulPairs() { return MulPairs; }
  
@@ -546,74 +540,57 @@ bool ARMParallelDSP::CreateParallelPairs(Reduction &R) {
      return false;
  
    // Check that the muls operate directly upon sign extended loads.
-  for (auto &MulChain : R.getMuls()) {
-    // A mul has 2 operands, and a narrow op consist of sext and a load; thus
-    // we expect at least 4 items in this operand value list.
-    if (MulChain->size() < 4) {
-      LLVM_DEBUG(dbgs() << "Operand list too short.\n");
+  for (auto &MulCand : R.getMuls()) {
+    if (!MulCand->HasTwoLoadInputs())
        return false;
-    }
-    ValueList &LHS = static_cast<BinOpChain*>(MulChain.get())->LHS;
-    ValueList &RHS = static_cast<BinOpChain*>(MulChain.get())->RHS;
-
-    // Use +=2 to skip over the expected extend instructions.
-    for (unsigned i = 0, e = LHS.size(); i < e; i += 2) {
-      if (!isa<LoadInst>(LHS[i]) || !isa<LoadInst>(RHS[i]))
-        return false;
-    }
    }
  
-  auto CanPair = [&](Reduction &R, BinOpChain *PMul0, BinOpChain *PMul1) {
+  auto CanPair = [&](Reduction &R, MulCandidate *PMul0, MulCandidate *PMul1) {
      // The first elements of each vector should be loads with sexts. If we
      // find that its two pairs of consecutive loads, then these can be
      // transformed into two wider loads and the users can be replaced with
      // DSP intrinsics.
-    for (unsigned x = 0; x < PMul0->LHS.size(); x += 2) {
-      auto *Ld0 = dyn_cast<LoadInst>(PMul0->LHS[x]);
-      auto *Ld1 = dyn_cast<LoadInst>(PMul1->LHS[x]);
-      auto *Ld2 = dyn_cast<LoadInst>(PMul0->RHS[x]);
-      auto *Ld3 = dyn_cast<LoadInst>(PMul1->RHS[x]);
-
-      if (!Ld0 || !Ld1 || !Ld2 || !Ld3)
-        return false;
-
-      LLVM_DEBUG(dbgs() << "Loads:\n"
-                 << " - " << *Ld0 << "\n"
-                 << " - " << *Ld1 << "\n"
-                 << " - " << *Ld2 << "\n"
-                 << " - " << *Ld3 << "\n");
-
-      if (AreSequentialLoads(Ld0, Ld1, PMul0->VecLd)) {
-        if (AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
-          LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
-          R.AddMulPair(PMul0, PMul1);
-          return true;
-        } else if (AreSequentialLoads(Ld3, Ld2, PMul1->VecLd)) {
-          LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
-          LLVM_DEBUG(dbgs() << "    exchanging Ld2 and Ld3\n");
-          PMul1->Exchange = true;
-          R.AddMulPair(PMul0, PMul1);
-          return true;
-        }
-      } else if (AreSequentialLoads(Ld1, Ld0, PMul0->VecLd) &&
-                 AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
+    auto Ld0 = static_cast<LoadInst*>(PMul0->LHS);
+    auto Ld1 = static_cast<LoadInst*>(PMul1->LHS);
+    auto Ld2 = static_cast<LoadInst*>(PMul0->RHS);
+    auto Ld3 = static_cast<LoadInst*>(PMul1->RHS);
+
+    LLVM_DEBUG(dbgs() << "Loads:\n"
+               << " - " << *Ld0 << "\n"
+               << " - " << *Ld1 << "\n"
+               << " - " << *Ld2 << "\n"
+               << " - " << *Ld3 << "\n");
+
+    if (AreSequentialLoads(Ld0, Ld1, PMul0->VecLd)) {
+      if (AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
+        LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
+        R.AddMulPair(PMul0, PMul1);
+        return true;
+      } else if (AreSequentialLoads(Ld3, Ld2, PMul1->VecLd)) {
          LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
-        LLVM_DEBUG(dbgs() << "    exchanging Ld0 and Ld1\n");
-        LLVM_DEBUG(dbgs() << "    and swapping muls\n");
-        PMul0->Exchange = true;
-        // Only the second operand can be exchanged, so swap the muls.
-        R.AddMulPair(PMul1, PMul0);
+        LLVM_DEBUG(dbgs() << "    exchanging Ld2 and Ld3\n");
+        PMul1->Exchange = true;
+        R.AddMulPair(PMul0, PMul1);
          return true;
        }
+    } else if (AreSequentialLoads(Ld1, Ld0, PMul0->VecLd) &&
+               AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
+      LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
+      LLVM_DEBUG(dbgs() << "    exchanging Ld0 and Ld1\n");
+      LLVM_DEBUG(dbgs() << "    and swapping muls\n");
+      PMul0->Exchange = true;
+      // Only the second operand can be exchanged, so swap the muls.
+      R.AddMulPair(PMul1, PMul0);
+      return true;
      }
      return false;
    };
  
-  OpChainList &Muls = R.getMuls();
+  MulCandList &Muls = R.getMuls();
    const unsigned Elems = Muls.size();
    SmallPtrSet<const Instruction*, 4> Paired;
    for (unsigned i = 0; i < Elems; ++i) {
-    BinOpChain *PMul0 = static_cast<BinOpChain*>(Muls[i].get());
+    MulCandidate *PMul0 = static_cast<MulCandidate*>(Muls[i].get());
      if (Paired.count(PMul0->Root))
        continue;
  
@@ -621,7 +598,7 @@ bool ARMParallelDSP::CreateParallelPairs(Reduction &R) {
        if (i == j)
          continue;
  
-      BinOpChain *PMul1 = static_cast<BinOpChain*>(Muls[j].get());
+      MulCandidate *PMul1 = static_cast<MulCandidate*>(Muls[j].get());
        if (Paired.count(PMul1->Root))
          continue;
  
@@ -682,8 +659,8 @@ void ARMParallelDSP::InsertParallelMACs(Reduction &R) {
    LLVM_DEBUG(dbgs() << "Root: " << *InsertAfter << "\n"
               << "Acc: " << *Acc << "\n");
    for (auto &Pair : R.getMulPairs()) {
-    BinOpChain *PMul0 = Pair.first;
-    BinOpChain *PMul1 = Pair.second;
+    MulCandidate *PMul0 = Pair.first;
+    MulCandidate *PMul1 = Pair.second;
      LLVM_DEBUG(dbgs() << "Muls:\n"
                 << "- " << *PMul0->Root << "\n"
                 << "- " << *PMul1->Root << "\n");
author	Sam Parker <sam.parker@arm.com>
	Mon, 29 Jul 2019 08:41:51 +0000 (08:41 +0000)
committer	Sam Parker <sam.parker@arm.com>
	Mon, 29 Jul 2019 08:41:51 +0000 (08:41 +0000)