[DivergenceAnalysis] Add methods for querying divergence at use

author Jay Foad <jay.foad@gmail.com>

Mon, 29 Jul 2019 10:22:09 +0000 (10:22 +0000)

committer Jay Foad <jay.foad@gmail.com>

Mon, 29 Jul 2019 10:22:09 +0000 (10:22 +0000)
author Jay Foad <jay.foad@gmail.com>
Mon, 29 Jul 2019 10:22:09 +0000 (10:22 +0000)
committer Jay Foad <jay.foad@gmail.com>
Mon, 29 Jul 2019 10:22:09 +0000 (10:22 +0000)
diff --git a/include/llvm/Analysis/DivergenceAnalysis.h b/include/llvm/Analysis/DivergenceAnalysis.h

index 3cfb9d13df94d094760ee816d81a5229057ced09..2fac9c8b4b343732402bf71b1aa1f5e5ccefbc5e 100644 (file)
--- a/include/llvm/Analysis/DivergenceAnalysis.h
+++ b/include/llvm/Analysis/DivergenceAnalysis.h
@@ -73,9 +73,12 @@ public:
    /// operands
    bool isAlwaysUniform(const Value &Val) const;
  
-  /// \brief Whether \p Val is a divergent value
+  /// \brief Whether \p Val is divergent at its definition.
    bool isDivergent(const Value &Val) const;
  
+  /// \brief Whether \p U is divergent. Uses of a uniform value can be divergent.
+  bool isDivergentUse(const Use &U) const;
+
    void print(raw_ostream &OS, const Module *) const;
  
  private:
@@ -189,12 +192,19 @@ public:
    /// The GPU kernel this analysis result is for
    const Function &getFunction() const { return DA.getFunction(); }
  
-  /// Whether \p V is divergent.
+  /// Whether \p V is divergent at its definition.
    bool isDivergent(const Value &V) const;
  
-  /// Whether \p V is uniform/non-divergent
+  /// Whether \p U is divergent. Uses of a uniform value can be divergent.
+  bool isDivergentUse(const Use &U) const;
+
+  /// Whether \p V is uniform/non-divergent.
    bool isUniform(const Value &V) const { return !isDivergent(V); }
  
+  /// Whether \p U is uniform/non-divergent. Uses of a uniform value can be
+  /// divergent.
+  bool isUniformUse(const Use &U) const { return !isDivergentUse(U); }
+
    /// Print all divergent values in the kernel.
    void print(raw_ostream &OS, const Module *) const;
  };
diff --git a/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/include/llvm/Analysis/LegacyDivergenceAnalysis.h

index 0a338b816640e3e08ae015ef979687ecfa76b822..e33b8f4129f331b2721a2bd6ce9db01d5b2f299e 100644 (file)
--- a/include/llvm/Analysis/LegacyDivergenceAnalysis.h
+++ b/include/llvm/Analysis/LegacyDivergenceAnalysis.h
@@ -39,17 +39,18 @@ public:
    void print(raw_ostream &OS, const Module *) const override;
  
    // Returns true if V is divergent at its definition.
-  //
-  // Even if this function returns false, V may still be divergent when used
-  // in a different basic block.
    bool isDivergent(const Value *V) const;
  
+  // Returns true if U is divergent. Uses of a uniform value can be divergent.
+  bool isDivergentUse(const Use *U) const;
+
    // Returns true if V is uniform/non-divergent.
-  //
-  // Even if this function returns true, V may still be divergent when used
-  // in a different basic block.
    bool isUniform(const Value *V) const { return !isDivergent(V); }
  
+  // Returns true if U is uniform/non-divergent. Uses of a uniform value can be
+  // divergent.
+  bool isUniformUse(const Use *U) const { return !isDivergentUse(U); }
+
    // Keep the analysis results uptodate by removing an erased value.
    void removeValue(const Value *V) { DivergentValues.erase(V); }
  
@@ -62,6 +63,9 @@ private:
  
    // Stores all divergent values.
    DenseSet<const Value *> DivergentValues;
+
+  // Stores divergent uses of possibly uniform values.
+  DenseSet<const Use *> DivergentUses;
  };
  } // End llvm namespace
  
diff --git a/lib/Analysis/DivergenceAnalysis.cpp b/lib/Analysis/DivergenceAnalysis.cpp

index 0ccd59ef2bfd46134d79a3c855125862847383d3..3d1be1e1cce093aeec9e9114d8f3add8ae53f0b4 100644 (file)
--- a/lib/Analysis/DivergenceAnalysis.cpp
+++ b/lib/Analysis/DivergenceAnalysis.cpp
@@ -412,6 +412,12 @@ bool DivergenceAnalysis::isDivergent(const Value &V) const {
    return DivergentValues.find(&V) != DivergentValues.end();
  }
  
+bool DivergenceAnalysis::isDivergentUse(const Use &U) const {
+  Value &V = *U.get();
+  Instruction &I = *cast<Instruction>(U.getUser());
+  return isDivergent(V) || isTemporalDivergent(*I.getParent(), V);
+}
+
  void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
    if (DivergentValues.empty())
      return;
@@ -449,6 +455,10 @@ bool GPUDivergenceAnalysis::isDivergent(const Value &val) const {
    return DA.isDivergent(val);
  }
  
+bool GPUDivergenceAnalysis::isDivergentUse(const Use &use) const {
+  return DA.isDivergentUse(use);
+}
+
  void GPUDivergenceAnalysis::print(raw_ostream &OS, const Module *mod) const {
    OS << "Divergence of kernel " << DA.getFunction().getName() << " {\n";
    DA.print(OS, mod);
diff --git a/lib/Analysis/LegacyDivergenceAnalysis.cpp b/lib/Analysis/LegacyDivergenceAnalysis.cpp

index 52212e1c42aa6ab2393abb8dc99a68b76f00df9e..2fd8b8d6e1de9cee82685f1b1c4a403564fb59a9 100644 (file)
--- a/lib/Analysis/LegacyDivergenceAnalysis.cpp
+++ b/lib/Analysis/LegacyDivergenceAnalysis.cpp
@@ -93,8 +93,9 @@ namespace {
  class DivergencePropagator {
  public:
    DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
-                       PostDominatorTree &PDT, DenseSet<const Value *> &DV)
-      : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {}
+                       PostDominatorTree &PDT, DenseSet<const Value *> &DV,
+                       DenseSet<const Use *> &DU)
+      : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV), DU(DU) {}
    void populateWithSourcesOfDivergence();
    void propagate();
  
@@ -118,11 +119,14 @@ private:
    PostDominatorTree &PDT;
    std::vector<Value *> Worklist; // Stack for DFS.
    DenseSet<const Value *> &DV;   // Stores all divergent values.
+  DenseSet<const Use *> &DU;   // Stores divergent uses of possibly uniform
+                               // values.
  };
  
  void DivergencePropagator::populateWithSourcesOfDivergence() {
    Worklist.clear();
    DV.clear();
+  DU.clear();
    for (auto &I : instructions(F)) {
      if (TTI.isSourceOfDivergence(&I)) {
        Worklist.push_back(&I);
@@ -197,8 +201,10 @@ void DivergencePropagator::exploreSyncDependency(Instruction *TI) {
    // dominators of TI until it is outside the influence region.
    BasicBlock *InfluencedBB = ThisBB;
    while (InfluenceRegion.count(InfluencedBB)) {
-    for (auto &I : *InfluencedBB)
-      findUsersOutsideInfluenceRegion(I, InfluenceRegion);
+    for (auto &I : *InfluencedBB) {
+      if (!DV.count(&I))
+        findUsersOutsideInfluenceRegion(I, InfluenceRegion);
+    }
      DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom();
      if (IDomNode == nullptr)
        break;
@@ -208,9 +214,10 @@ void DivergencePropagator::exploreSyncDependency(Instruction *TI) {
  
  void DivergencePropagator::findUsersOutsideInfluenceRegion(
      Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) {
-  for (User *U : I.users()) {
-    Instruction *UserInst = cast<Instruction>(U);
+  for (Use &Use : I.uses()) {
+    Instruction *UserInst = cast<Instruction>(Use.getUser());
      if (!InfluenceRegion.count(UserInst->getParent())) {
+      DU.insert(&Use);
        if (DV.insert(UserInst).second)
          Worklist.push_back(UserInst);
      }
@@ -320,6 +327,7 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
      return false;
  
    DivergentValues.clear();
+  DivergentUses.clear();
    gpuDA = nullptr;
  
    auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -332,7 +340,7 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
  
    } else {
      // run LLVM's existing DivergenceAnalysis
-    DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues);
+    DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues, DivergentUses);
      DP.populateWithSourcesOfDivergence();
      DP.propagate();
    }
@@ -351,6 +359,13 @@ bool LegacyDivergenceAnalysis::isDivergent(const Value *V) const {
    return DivergentValues.count(V);
  }
  
+bool LegacyDivergenceAnalysis::isDivergentUse(const Use *U) const {
+  if (gpuDA) {
+    return gpuDA->isDivergentUse(*U);
+  }
+  return DivergentValues.count(U->get()) || DivergentUses.count(U);
+}
+
  void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
    if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty())
      return;
diff --git a/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp

index de974011a917fc74ad05689f59b011991ea22446..92d94ee894a5bb4ba3140b61c07b901675224a7f 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -142,11 +142,11 @@ void AMDGPUAtomicOptimizer::visitAtomicRMWInst(AtomicRMWInst &I) {
  
    // If the pointer operand is divergent, then each lane is doing an atomic
    // operation on a different address, and we cannot optimize that.
-  if (DA->isDivergent(I.getOperand(PtrIdx))) {
+  if (DA->isDivergentUse(&I.getOperandUse(PtrIdx))) {
      return;
    }
  
-  const bool ValDivergent = DA->isDivergent(I.getOperand(ValIdx));
+  const bool ValDivergent = DA->isDivergentUse(&I.getOperandUse(ValIdx));
  
    // If the value operand is divergent, each lane is contributing a different
    // value to the atomic calculation. We can only optimize divergent values if
@@ -219,7 +219,7 @@ void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) {
  
    const unsigned ValIdx = 0;
  
-  const bool ValDivergent = DA->isDivergent(I.getOperand(ValIdx));
+  const bool ValDivergent = DA->isDivergentUse(&I.getOperandUse(ValIdx));
  
    // If the value operand is divergent, each lane is contributing a different
    // value to the atomic calculation. We can only optimize divergent values if
@@ -232,7 +232,7 @@ void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) {
    // If any of the other arguments to the intrinsic are divergent, we can't
    // optimize the operation.
    for (unsigned Idx = 1; Idx < I.getNumOperands(); Idx++) {
-    if (DA->isDivergent(I.getOperand(Idx))) {
+    if (DA->isDivergentUse(&I.getOperandUse(Idx))) {
        return;
      }
    }
diff --git a/test/CodeGen/AMDGPU/divergence-at-use.ll b/test/CodeGen/AMDGPU/divergence-at-use.ll

new file mode 100644 (file)

index 0000000..1aba1e4
--- /dev/null
+++ b/test/CodeGen/AMDGPU/divergence-at-use.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizations=true < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizations=true < %s -use-gpu-divergence-analysis | FileCheck %s
+
+@local = addrspace(3) global i32 undef
+
+define void @reducible(i32 %x) {
+; CHECK-LABEL: reducible:
+; CHECK-NOT: dpp
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i1, %loop ]
+  %gep = getelementptr i32, i32 addrspace(3)* @local, i32 %i
+  %cond = icmp ult i32 %i, %x
+  %i1 = add i32 %i, 1
+  br i1 %cond, label %loop, label %exit
+exit:
+  %old = atomicrmw add i32 addrspace(3)* %gep, i32 %x acq_rel
+  ret void
+}
author	Jay Foad <jay.foad@gmail.com>
	Mon, 29 Jul 2019 10:22:09 +0000 (10:22 +0000)
committer	Jay Foad <jay.foad@gmail.com>
	Mon, 29 Jul 2019 10:22:09 +0000 (10:22 +0000)
include/llvm/Analysis/DivergenceAnalysis.h		patch \| blob \| history
include/llvm/Analysis/LegacyDivergenceAnalysis.h		patch \| blob \| history
lib/Analysis/DivergenceAnalysis.cpp		patch \| blob \| history
lib/Analysis/LegacyDivergenceAnalysis.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/divergence-at-use.ll	[new file with mode: 0644]	patch \| blob