[LICM&MSSA] Limit store hoisting.

author Alina Sbirlea <asbirlea@google.com>

Mon, 11 Feb 2019 19:07:15 +0000 (19:07 +0000)

committer Alina Sbirlea <asbirlea@google.com>

Mon, 11 Feb 2019 19:07:15 +0000 (19:07 +0000)
author Alina Sbirlea <asbirlea@google.com>
Mon, 11 Feb 2019 19:07:15 +0000 (19:07 +0000)
committer Alina Sbirlea <asbirlea@google.com>
Mon, 11 Feb 2019 19:07:15 +0000 (19:07 +0000)
diff --git a/include/llvm/Transforms/Utils/LoopUtils.h b/include/llvm/Transforms/Utils/LoopUtils.h

index fd0826648577950278b6d64a213e1dc10027fdf7..d1448874d4aa6c41210f5950fbb18444659ed502 100644 (file)
--- a/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/include/llvm/Transforms/Utils/LoopUtils.h
@@ -112,7 +112,7 @@ bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
  bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
                  TargetLibraryInfo *, TargetTransformInfo *, Loop *,
                  AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
-                OptimizationRemarkEmitter *ORE);
+                bool, OptimizationRemarkEmitter *);
  
  /// Walk the specified region of the CFG (defined by all blocks
  /// dominated by the specified block, and that are in the current loop) in depth
@@ -124,8 +124,8 @@ bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
  /// ORE. It returns changed status.
  bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
                   TargetLibraryInfo *, Loop *, AliasSetTracker *,
-                 MemorySSAUpdater *, ICFLoopSafetyInfo *,
-                 OptimizationRemarkEmitter *ORE);
+                 MemorySSAUpdater *, ICFLoopSafetyInfo *, bool,
+                 OptimizationRemarkEmitter *);
  
  /// This function deletes dead loops. The caller of this function needs to
  /// guarantee that the loop is infact dead.
@@ -276,6 +276,7 @@ void getLoopAnalysisUsage(AnalysisUsage &AU);
  bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
                          Loop *CurLoop, AliasSetTracker *CurAST,
                          MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop,
+                        bool NoOfMemAccessesTooLarge,
                          OptimizationRemarkEmitter *ORE = nullptr);
  
  /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp

index 082f024d80f57e080d8cd56a74e313e6af98425e..c83889748d428b8c54854972acf7622b6b1f7c7e 100644 (file)
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -306,7 +306,8 @@ bool LoopInvariantCodeMotion::runOnLoop(
  
    std::unique_ptr<AliasSetTracker> CurAST;
    std::unique_ptr<MemorySSAUpdater> MSSAU;
-  bool LocalDisablePromotion = false;
+  bool NoOfMemAccTooLarge = false;
+
    if (!MSSA) {
      LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
      CurAST = collectAliasInfoForLoop(L, LI, AA);
@@ -321,12 +322,12 @@ bool LoopInvariantCodeMotion::runOnLoop(
            (void)MA;
            AccessCapCount++;
            if (AccessCapCount > AccessCapForMSSAPromotion) {
-            LocalDisablePromotion = true;
+            NoOfMemAccTooLarge = true;
              break;
            }
          }
        }
-      if (LocalDisablePromotion)
+      if (NoOfMemAccTooLarge)
          break;
      }
    }
@@ -350,10 +351,12 @@ bool LoopInvariantCodeMotion::runOnLoop(
    //
    if (L->hasDedicatedExits())
      Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
-                          CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
+                          CurAST.get(), MSSAU.get(), &SafetyInfo,
+                          NoOfMemAccTooLarge, ORE);
    if (Preheader)
      Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
-                           CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
+                           CurAST.get(), MSSAU.get(), &SafetyInfo,
+                           NoOfMemAccTooLarge, ORE);
  
    // Now that all loop invariants have been removed from the loop, promote any
    // memory references to scalars that we can.
@@ -363,7 +366,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
    // preheader for SSA updater, so also avoid sinking when no preheader
    // is available.
    if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
-      !LocalDisablePromotion) {
+      !NoOfMemAccTooLarge) {
      // Figure out the loop exits and their insertion points
      SmallVector<BasicBlock *, 8> ExitBlocks;
      L->getUniqueExitBlocks(ExitBlocks);
@@ -457,7 +460,7 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
                        DominatorTree *DT, TargetLibraryInfo *TLI,
                        TargetTransformInfo *TTI, Loop *CurLoop,
                        AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
-                      ICFLoopSafetyInfo *SafetyInfo,
+                      ICFLoopSafetyInfo *SafetyInfo, bool NoOfMemAccTooLarge,
                        OptimizationRemarkEmitter *ORE) {
  
    // Verify inputs.
@@ -501,7 +504,8 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
        //
        bool FreeInLoop = false;
        if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
-          canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, ORE) &&
+          canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true,
+                             NoOfMemAccTooLarge, ORE) &&
            !I.mayHaveSideEffects()) {
          if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE, FreeInLoop)) {
            if (!FreeInLoop) {
@@ -755,7 +759,7 @@ public:
  bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
                         DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
                         AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
-                       ICFLoopSafetyInfo *SafetyInfo,
+                       ICFLoopSafetyInfo *SafetyInfo, bool NoOfMemAccTooLarge,
                         OptimizationRemarkEmitter *ORE) {
    // Verify inputs.
    assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
@@ -808,7 +812,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
        // and we have accurately duplicated the control flow from the loop header
        // to that block.
        if (CurLoop->hasLoopInvariantOperands(&I) &&
-          canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, ORE) &&
+          canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true,
+                             NoOfMemAccTooLarge, ORE) &&
            isSafeToExecuteUnconditionally(
                I, DT, CurLoop, SafetyInfo, ORE,
                CurLoop->getLoopPreheader()->getTerminator())) {
@@ -1035,6 +1040,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
                                Loop *CurLoop, AliasSetTracker *CurAST,
                                MemorySSAUpdater *MSSAU,
                                bool TargetExecutesOncePerLoop,
+                              bool NoOfMemAccTooLarge,
                                OptimizationRemarkEmitter *ORE) {
    // If we don't understand the instruction, bail early.
    if (!isHoistableAndSinkableInst(I))
@@ -1173,9 +1179,28 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
          return true;
        if (!EnableLicmCap) {
          auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
-        if (MSSA->isLiveOnEntryDef(Source) ||
-            !CurLoop->contains(Source->getBlock()))
-          return true;
+        // If there are no clobbering Defs in the loop, we still need to check
+        // for interfering Uses. If there are more accesses than the Promotion
+        // cap, give up, we're not walking a list that long. Otherwise, walk the
+        // list, check each Use if it's optimized to an access outside the loop.
+        // If yes, store is safe to hoist. This is fairly restrictive, but
+        // conservatively correct.
+        // TODO: Cache set of Uses on the first walk in runOnLoop, update when
+        // moving accesses. Can also extend to dominating uses.
+        if ((!MSSA->isLiveOnEntryDef(Source) &&
+             CurLoop->contains(Source->getBlock())) ||
+            NoOfMemAccTooLarge)
+          return false;
+        for (auto *BB : CurLoop->getBlocks())
+          if (auto *Accesses = MSSA->getBlockAccesses(BB))
+            for (const auto &MA : *Accesses)
+              if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
+                auto *MD = MU->getDefiningAccess();
+                if (!MSSA->isLiveOnEntryDef(MD) &&
+                    CurLoop->contains(MD->getBlock()))
+                  return false;
+              }
+        return true;
        }
        return false;
      }
diff --git a/lib/Transforms/Scalar/LoopSink.cpp b/lib/Transforms/Scalar/LoopSink.cpp

index 3235b639c9e04303c1130b02cdd8315c662bf5c1..b3cf208ce1e3abfc2c56bcc4da4fd14321373917 100644 (file)
--- a/lib/Transforms/Scalar/LoopSink.cpp
+++ b/lib/Transforms/Scalar/LoopSink.cpp
@@ -303,7 +303,7 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
      // No need to check for instruction's operands are loop invariant.
      assert(L.hasLoopInvariantOperands(I) &&
             "Insts in a loop's preheader should have loop invariant operands!");
-    if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr, false))
+    if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr, false, false))
        continue;
      if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI))
        Changed = true;
diff --git a/test/Transforms/LICM/store-hoisting.ll b/test/Transforms/LICM/store-hoisting.ll

index 1b1497857a29c5cbbe9d573e0dbe631fe86772ac..3f7f3042febda447ab8657de38d8e06cfeabc885 100644 (file)
--- a/test/Transforms/LICM/store-hoisting.ll
+++ b/test/Transforms/LICM/store-hoisting.ll
@@ -1,5 +1,6 @@
-; RUN: opt -S -basicaa -licm %s | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s
+; RUN: opt -S -basicaa -licm %s | FileCheck -check-prefixes=CHECK,AST %s
+; RUN: opt -S -basicaa -licm -enable-mssa-loop-dependency=true %s | FileCheck  -check-prefixes=CHECK,MSSA %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck -check-prefixes=CHECK,AST %s
  
  define void @test(i32* %loc) {
  ; CHECK-LABEL: @test
@@ -46,8 +47,11 @@ exit2:
  
  define i32* @false_negative_2use(i32* %loc) {
  ; CHECK-LABEL: @false_negative_2use
-; CHECK-LABEL: exit:
-; CHECK: store i32 0, i32* %loc
+; AST-LABEL: exit:
+; AST: store i32 0, i32* %loc
+; MSSA-LABEL: entry:
+; MSSA: store i32 0, i32* %loc
+; MSSA-LABEL: loop:
  entry:
    br label %loop
  
@@ -119,6 +123,7 @@ exit:
    ret void
  }
  
+; Hoisting the store is actually valid here, as it dominates the load.
  define void @neg_ref(i32* %loc) {
  ; CHECK-LABEL: @neg_ref
  ; CHECK-LABEL: exit1:
@@ -146,6 +151,35 @@ exit2:
    ret void
  }
  
+; Hoisting the store here leads to a miscompile.
+define void @neg_ref2(i32* %loc) {
+; CHECK-LABEL: @neg_ref2
+; CHECK-LABEL: exit1:
+; CHECK: store i32 0, i32* %loc
+; CHECK-LABEL: exit2:
+; CHECK: store i32 0, i32* %loc
+entry:
+  store i32 198, i32* %loc
+  br label %loop
+
+loop:
+  %iv = phi i32 [0, %entry], [%iv.next, %backedge]
+  %v = load i32, i32* %loc
+  store i32 0, i32* %loc
+  %earlycnd = icmp eq i32 %v, 198
+  br i1 %earlycnd, label %exit1, label %backedge
+  
+backedge:
+  %iv.next = add i32 %iv, 1
+  %cmp = icmp slt i32 %iv, 200
+  br i1 %cmp, label %loop, label %exit2
+
+exit1:
+  ret void
+exit2:
+  ret void
+}
+
  declare void @modref()
  
  define void @neg_modref(i32* %loc) {
author	Alina Sbirlea <asbirlea@google.com>
	Mon, 11 Feb 2019 19:07:15 +0000 (19:07 +0000)
committer	Alina Sbirlea <asbirlea@google.com>
	Mon, 11 Feb 2019 19:07:15 +0000 (19:07 +0000)
include/llvm/Transforms/Utils/LoopUtils.h		patch \| blob \| history
lib/Transforms/Scalar/LICM.cpp		patch \| blob \| history
lib/Transforms/Scalar/LoopSink.cpp		patch \| blob \| history
test/Transforms/LICM/store-hoisting.ll		patch \| blob \| history