Reapply "[GVN] Prevent LoadPRE from hoisting across instructions that don't pass...

author Max Kazantsev <max.kazantsev@azul.com>

Tue, 31 Oct 2017 05:07:56 +0000 (05:07 +0000)

committer Max Kazantsev <max.kazantsev@azul.com>

Tue, 31 Oct 2017 05:07:56 +0000 (05:07 +0000)
author Max Kazantsev <max.kazantsev@azul.com>
Tue, 31 Oct 2017 05:07:56 +0000 (05:07 +0000)
committer Max Kazantsev <max.kazantsev@azul.com>
Tue, 31 Oct 2017 05:07:56 +0000 (05:07 +0000)
diff --git a/include/llvm/Transforms/Scalar/GVN.h b/include/llvm/Transforms/Scalar/GVN.h

index 251492a4e5d390688528a95076c370858f65cf0a..440d3f67c35a765dd08d49230c0bb81b42f8ba34 100644 (file)
--- a/include/llvm/Transforms/Scalar/GVN.h
+++ b/include/llvm/Transforms/Scalar/GVN.h
@@ -18,6 +18,7 @@
  
  #include "llvm/ADT/DenseMap.h"
  #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
  #include "llvm/ADT/SetVector.h"
  #include "llvm/ADT/SmallVector.h"
  #include "llvm/Analysis/AliasAnalysis.h"
@@ -27,6 +28,7 @@
  #include "llvm/IR/PassManager.h"
  #include "llvm/Support/Allocator.h"
  #include "llvm/Support/Compiler.h"
+#include "llvm/Transforms/Utils/OrderedInstructions.h"
  #include <cstdint>
  #include <utility>
  #include <vector>
@@ -156,7 +158,11 @@ private:
    AssumptionCache *AC;
    SetVector<BasicBlock *> DeadBlocks;
    OptimizationRemarkEmitter *ORE;
+  // Maps a block to the topmost instruction with implicit control flow in it.
+  DenseMap<const BasicBlock *, const Instruction *>
+      FirstImplicitControlFlowInsts;
  
+  OrderedInstructions *OI;
    ValueTable VN;
  
    /// A mapping from value numbers to lists of Value*'s that
@@ -268,6 +274,7 @@ private:
                                   BasicBlock *Curr, unsigned int ValNo);
    Value *findLeader(const BasicBlock *BB, uint32_t num);
    void cleanupGlobalSets();
+  void fillImplicitControlFlowInfo(BasicBlock *BB);
    void verifyRemoved(const Instruction *I) const;
    bool splitCriticalEdges();
    BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ);
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp

index 7e416368468746937a54c3ff4b5778d8907b0d67..45b44368a3d3974ad4a0e87ac1810193bba449cf 100644 (file)
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -38,6 +38,7 @@
  #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  #include "llvm/Analysis/PHITransAddr.h"
  #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
  #include "llvm/IR/Attributes.h"
  #include "llvm/IR/BasicBlock.h"
  #include "llvm/IR/CallSite.h"
@@ -1048,7 +1049,32 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
    // backwards through predecessors if needed.
    BasicBlock *LoadBB = LI->getParent();
    BasicBlock *TmpBB = LoadBB;
+  bool IsSafeToSpeculativelyExecute = isSafeToSpeculativelyExecute(LI);
  
+  // Check that there is no implicit control flow instructions above our load in
+  // its block. If there is an instruction that doesn't always pass the
+  // execution to the following instruction, then moving through it may become
+  // invalid. For example:
+  //
+  // int arr[LEN];
+  // int index = ???;
+  // ...
+  // guard(0 <= index && index < LEN);
+  // use(arr[index]);
+  //
+  // It is illegal to move the array access to any point above the guard,
+  // because if the index is out of bounds we should deoptimize rather than
+  // access the array.
+  // Check that there is no guard in this block above our intruction.
+  if (!IsSafeToSpeculativelyExecute) {
+    auto It = FirstImplicitControlFlowInsts.find(TmpBB);
+    if (It != FirstImplicitControlFlowInsts.end()) {
+      assert(It->second->getParent() == TmpBB &&
+             "Implicit control flow map broken?");
+      if (OI->dominates(It->second, LI))
+        return false;
+    }
+  }
    while (TmpBB->getSinglePredecessor()) {
      TmpBB = TmpBB->getSinglePredecessor();
      if (TmpBB == LoadBB) // Infinite (unreachable) loop.
@@ -1063,6 +1089,11 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
      // which it was not previously executed.
      if (TmpBB->getTerminator()->getNumSuccessors() != 1)
        return false;
+
+    // Check that there is no implicit control flow in a block above.
+    if (!IsSafeToSpeculativelyExecute &&
+        FirstImplicitControlFlowInsts.count(TmpBB))
+      return false;
    }
  
    assert(TmpBB);
@@ -1982,6 +2013,8 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
    TLI = &RunTLI;
    VN.setAliasAnalysis(&RunAA);
    MD = RunMD;
+  OrderedInstructions OrderedInstrs(DT);
+  OI = &OrderedInstrs;
    VN.setMemDep(MD);
    ORE = RunORE;
  
@@ -2064,14 +2097,26 @@ bool GVN::processBlock(BasicBlock *BB) {
      if (!AtStart)
        --BI;
  
+    bool InvalidateImplicitCF = false;
+    const Instruction *MaybeFirstICF = FirstImplicitControlFlowInsts.lookup(BB);
      for (auto *I : InstrsToErase) {
        assert(I->getParent() == BB && "Removing instruction from wrong block?");
        DEBUG(dbgs() << "GVN removed: " << *I << '\n');
        if (MD) MD->removeInstruction(I);
        DEBUG(verifyRemoved(I));
+      if (MaybeFirstICF == I) {
+        // We have erased the first ICF in block. The map needs to be updated.
+        InvalidateImplicitCF = true;
+        // Do not keep dangling pointer on the erased instruction.
+        MaybeFirstICF = nullptr;
+      }
        I->eraseFromParent();
      }
+
+    OI->invalidateBlock(BB);
      InstrsToErase.clear();
+    if (InvalidateImplicitCF)
+      fillImplicitControlFlowInfo(BB);
  
      if (AtStart)
        BI = BB->begin();
@@ -2265,7 +2310,14 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
    if (MD)
      MD->removeInstruction(CurInst);
    DEBUG(verifyRemoved(CurInst));
+  bool InvalidateImplicitCF =
+      FirstImplicitControlFlowInsts.lookup(CurInst->getParent()) == CurInst;
+  // FIXME: Intended to be markInstructionForDeletion(CurInst), but it causes
+  // some assertion failures.
+  OI->invalidateBlock(CurrentBlock);
    CurInst->eraseFromParent();
+  if (InvalidateImplicitCF)
+    fillImplicitControlFlowInfo(CurrentBlock);
    ++NumGVNInstr;
  
    return true;
@@ -2332,6 +2384,9 @@ bool GVN::iterateOnFunction(Function &F) {
    // RPOT walks the graph in its constructor and will not be invalidated during
    // processBlock.
    ReversePostOrderTraversal<Function *> RPOT(&F);
+
+  for (BasicBlock *BB : RPOT)
+    fillImplicitControlFlowInfo(BB);
    for (BasicBlock *BB : RPOT)
      Changed |= processBlock(BB);
  
@@ -2343,6 +2398,48 @@ void GVN::cleanupGlobalSets() {
    LeaderTable.clear();
    BlockRPONumber.clear();
    TableAllocator.Reset();
+  FirstImplicitControlFlowInsts.clear();
+}
+
+void
+GVN::fillImplicitControlFlowInfo(BasicBlock *BB) {
+  // Make sure that all marked instructions are actually deleted by this point,
+  // so that we don't need to care about omitting them.
+  assert(InstrsToErase.empty() && "Filling before removed all marked insns?");
+  auto MayNotTransferExecutionToSuccessor = [&](const Instruction *I) {
+    // If a block's instruction doesn't always pass the control to its successor
+    // instruction, mark the block as having implicit control flow. We use them
+    // to avoid wrong assumptions of sort "if A is executed and B post-dominates
+    // A, then B is also executed". This is not true is there is an implicit
+    // control flow instruction (e.g. a guard) between them.
+    //
+    // TODO: Currently, isGuaranteedToTransferExecutionToSuccessor returns false
+    // for volatile stores and loads because they can trap. The discussion on
+    // whether or not it is correct is still ongoing. We might want to get rid
+    // of this logic in the future. Anyways, trapping instructions shouldn't
+    // introduce implicit control flow, so we explicitly allow them here. This
+    // must be removed once isGuaranteedToTransferExecutionToSuccessor is fixed.
+    if (isGuaranteedToTransferExecutionToSuccessor(I))
+      return false;
+    if (auto *LI = dyn_cast<LoadInst>(I)) {
+      assert(LI->isVolatile() && "Non-volatile load should transfer execution"
+                                 " to successor!");
+      return false;
+    }
+    if (auto *SI = dyn_cast<StoreInst>(I)) {
+      assert(SI->isVolatile() && "Non-volatile store should transfer execution"
+                                 " to successor!");
+      return false;
+    }
+    return true;
+  };
+  FirstImplicitControlFlowInsts.erase(BB);
+
+  for (auto &I : *BB)
+    if (MayNotTransferExecutionToSuccessor(&I)) {
+      FirstImplicitControlFlowInsts[BB] = &I;
+      break;
+    }
  }
  
  /// Verify that the specified instruction does not occur in our
diff --git a/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll b/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll

index 5fbb0fcc511db1833751dfd974d25beab53129c0..14f65a4a24a8c3f2a002118a4983f50bacf86afe 100644 (file)
--- a/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll
+++ b/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll
@@ -17,6 +17,13 @@ declare i64* @getaddr_i64(i64 addrspace(100)*) #0
  define hidden void @wrapon_fn173() {
  
  ; CHECK-LABEL: @wrapon_fn173
+; CHECK:       entry:
+; CHECK-NEXT:    call %ArrayImpl* @getaddr_ArrayImpl(%ArrayImpl addrspace(100)* undef)
+; CHECK-NEXT:    %.pre = load i64 addrspace(100)*, i64 addrspace(100)** null, align 8
+; CHECK-NEXT:    br label %loop
+; CHECK:       loop:
+; CHECK-NEXT:    call i64* @getaddr_i64(i64 addrspace(100)* %.pre)
+; CHECK-NEXT:    br label %loop
  
  entry:
    %0 = call %ArrayImpl* @getaddr_ArrayImpl(%ArrayImpl addrspace(100)* undef)
diff --git a/test/Transforms/GVN/PRE/pre-load-guards.ll b/test/Transforms/GVN/PRE/pre-load-guards.ll

new file mode 100644 (file)

index 0000000..eeb3ef6
--- /dev/null
+++ b/test/Transforms/GVN/PRE/pre-load-guards.ll
@@ -0,0 +1,146 @@
+; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+declare void @llvm.experimental.guard(i1, ...)
+
+; This is a motivating example on why we prohibit hoisting through guards.
+; In the bottom block, we check that the index is within bounds and only access
+; the element in this case and deoptimize otherwise. If we hoist the load to a
+; place above the guard, it will may lead to out-of-bound array access.
+define i32 @test_motivation(i32* %p, i32* %q, i1 %C, i32 %index, i32 %len) {
+; CHECK-LABEL: @test_motivation(
+block1:
+  %el1 = getelementptr inbounds i32, i32* %q, i32 %index
+  %el2 = getelementptr inbounds i32, i32* %p, i32 %index
+       br i1 %C, label %block2, label %block3
+
+block2:
+
+; CHECK:        block2:
+; CHECK-NEXT:     br
+; CHECK-NOT:      load
+; CHECK-NOT:      sge
+; CHECK-NOT:      slt
+; CHECK-NOT:      and
+  br label %block4
+
+block3:
+  store i32 0, i32* %el1
+  br label %block4
+
+block4:
+
+; CHECK:        block4:
+; CHECK:          %cond1 = icmp sge i32 %index, 0
+; CHECK-NEXT:     %cond2 = icmp slt i32 %index, %len
+; CHECK-NEXT:     %in.bounds = and i1 %cond1, %cond2
+; CHECK:          call void (i1, ...) @llvm.experimental.guard(i1 %in.bounds)
+; CHECK-NEXT:     %PRE = load i32, i32* %P2
+; CHECK:          ret i32 %PRE
+
+  %P2 = phi i32* [%el2, %block3], [%el1, %block2]
+  %cond1 = icmp sge i32 %index, 0
+  %cond2 = icmp slt i32 %index, %len
+  %in.bounds = and i1 %cond1, %cond2
+  call void (i1, ...) @llvm.experimental.guard(i1 %in.bounds) [ "deopt"() ]
+  %PRE = load i32, i32* %P2
+  ret i32 %PRE
+}
+
+; Guard in load's block that is above the load should prohibit the PRE.
+define i32 @test_guard_01(i32* %p, i32* %q, i1 %C, i1 %G) {
+; CHECK-LABEL: @test_guard_01(
+block1:
+       br i1 %C, label %block2, label %block3
+
+block2:
+
+; CHECK:        block2:
+; CHECK-NEXT:     br
+; CHECK-NOT:      load
+
+ br label %block4
+
+block3:
+  store i32 0, i32* %p
+  br label %block4
+
+block4:
+
+; CHECK:        block4:
+; CHECK:          call void (i1, ...) @llvm.experimental.guard(i1 %G)
+; CHECK-NEXT:     load
+; CHECK:          ret i32
+
+  %P2 = phi i32* [%p, %block3], [%q, %block2]
+  call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ]
+  %PRE = load i32, i32* %P2
+  ret i32 %PRE
+}
+
+; Guard in load's block that is below the load should not prohibit the PRE.
+define i32 @test_guard_02(i32* %p, i32* %q, i1 %C, i1 %G) {
+; CHECK-LABEL: @test_guard_02(
+block1:
+       br i1 %C, label %block2, label %block3
+
+block2:
+
+; CHECK:        block2:
+; CHECK-NEXT:     load i32, i32* %q
+
+ br label %block4
+
+block3:
+  store i32 0, i32* %p
+  br label %block4
+
+block4:
+
+; CHECK:        block4:
+; CHECK-NEXT:     phi i32 [
+; CHECK-NEXT:     phi i32* [
+; CHECK-NEXT:     call void (i1, ...) @llvm.experimental.guard(i1 %G)
+; CHECK-NOT:      load
+; CHECK:          ret i32
+
+  %P2 = phi i32* [%p, %block3], [%q, %block2]
+  %PRE = load i32, i32* %P2
+  call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ]
+  ret i32 %PRE
+}
+
+; Guard above the load's block should prevent PRE from hoisting through it.
+define i32 @test_guard_03(i32* %p, i32* %q, i1 %C, i1 %G) {
+; CHECK-LABEL: @test_guard_03(
+block1:
+       br i1 %C, label %block2, label %block3
+
+block2:
+
+; CHECK:        block2:
+; CHECK-NEXT:     br
+; CHECK-NOT:      load
+
+ br label %block4
+
+block3:
+  store i32 0, i32* %p
+  br label %block4
+
+block4:
+
+; CHECK:        block4:
+; CHECK-NEXT:     phi i32*
+; CHECK-NEXT:     call void (i1, ...) @llvm.experimental.guard(i1 %G)
+; CHECK-NEXT:     load
+; CHECK-NEXT:     ret i32
+
+  %P2 = phi i32* [%p, %block3], [%q, %block2]
+  call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ]
+  br label %block5
+
+block5:
+  %PRE = load i32, i32* %P2
+  ret i32 %PRE
+}
diff --git a/test/Transforms/GVN/PRE/pre-load-implicit-cf-updates.ll b/test/Transforms/GVN/PRE/pre-load-implicit-cf-updates.ll

new file mode 100644 (file)

index 0000000..c131e92
--- /dev/null
+++ b/test/Transforms/GVN/PRE/pre-load-implicit-cf-updates.ll
@@ -0,0 +1,118 @@
+; RUN: opt -S -gvn -enable-load-pre < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; These tests exercise situations when instructions that were first instructions
+; with implicit control flow get removed. We make sure that after that we don't
+; face crashes and are still able to perform PRE correctly.
+
+declare i32 @foo(i32 %arg) #0
+
+define hidden void @test_01(i32 %x, i32 %y) {
+
+; c2 only throws if c1 throws, so it can be safely removed and then PRE can
+; hoist the load out of loop.
+
+; CHECK-LABEL: @test_01
+; CHECK:       entry:
+; CHECK-NEXT:    %c1 = call i32 @foo(i32 %x)
+; CHECK-NEXT:    %val.pre = load i32, i32* null, align 8
+; CHECK-NEXT:    br label %loop
+; CHECK:       loop:
+; CHECK-NEXT:    %c3 = call i32 @foo(i32 %val.pre)
+; CHECK-NEXT:    br label %loop
+
+entry:
+  %c1 = call i32 @foo(i32 %x)
+  br label %loop
+
+loop:
+  %c2 = call i32 @foo(i32 %x)
+  %val = load i32, i32* null, align 8
+  %c3 = call i32 @foo(i32 %val)
+  br label %loop
+}
+
+define hidden void @test_02(i32 %x, i32 %y) {
+
+; PRE is not allowed because c2 may throw.
+
+; CHECK-LABEL: @test_02
+; CHECK:       entry:
+; CHECK-NEXT:    %c1 = call i32 @foo(i32 %x)
+; CHECK-NEXT:    br label %loop
+; CHECK:       loop:
+; CHECK-NEXT:    %c2 = call i32 @foo(i32 %y)
+; CHECK-NEXT:    %val = load i32, i32* null, align 8
+; CHECK-NEXT:    %c3 = call i32 @foo(i32 %val)
+; CHECK-NEXT:    br label %loop
+
+entry:
+  %c1 = call i32 @foo(i32 %x)
+  br label %loop
+
+loop:
+  %c2 = call i32 @foo(i32 %y)
+  %val = load i32, i32* null, align 8
+  %c3 = call i32 @foo(i32 %val)
+  br label %loop
+}
+
+define hidden void @test_03(i32 %x, i32 %y) {
+
+; PRE of load is allowed because c2 only throws if c1 throws. c3 should
+; not be eliminated. c4 is eliminated because it only throws if c3 throws.
+
+; CHECK-LABEL: @test_03
+; CHECK:       entry:
+; CHECK-NEXT:    %c1 = call i32 @foo(i32 %x)
+; CHECK-NEXT:    %val.pre = load i32, i32* null, align 8
+; CHECK-NEXT:    br label %loop
+; CHECK:       loop:
+; CHECK-NEXT:    %c3 = call i32 @foo(i32 %y)
+; CHECK-NEXT:    %c5 = call i32 @foo(i32 %val.pre)
+; CHECK-NEXT:    br label %loop
+
+entry:
+  %c1 = call i32 @foo(i32 %x)
+  br label %loop
+
+loop:
+  %c2 = call i32 @foo(i32 %x)
+  %val = load i32, i32* null, align 8
+  %c3 = call i32 @foo(i32 %y)
+  %val2 = load i32, i32* null, align 8
+  %c4 = call i32 @foo(i32 %y)
+  %c5 = call i32 @foo(i32 %val)
+  br label %loop
+}
+
+define hidden void @test_04(i32 %x, i32 %y) {
+
+; PRE is not allowed even after we remove c2 because now c3 prevents us from it.
+
+; CHECK-LABEL: @test_04
+; CHECK:       entry:
+; CHECK-NEXT:    %c1 = call i32 @foo(i32 %x)
+; CHECK-NEXT:    br label %loop
+; CHECK:       loop:
+; CHECK-NEXT:    %c3 = call i32 @foo(i32 %y)
+; CHECK-NEXT:    %val = load i32, i32* null, align 8
+; CHECK-NEXT:    %c5 = call i32 @foo(i32 %val)
+; CHECK-NEXT:    br label %loop
+
+entry:
+  %c1 = call i32 @foo(i32 %x)
+  br label %loop
+
+loop:
+  %c2 = call i32 @foo(i32 %x)
+  %c3 = call i32 @foo(i32 %y)
+  %val = load i32, i32* null, align 8
+  %c4 = call i32 @foo(i32 %y)
+  %c5 = call i32 @foo(i32 %val)
+  br label %loop
+}
+
+attributes #0 = { readnone }
diff --git a/test/Transforms/GVN/PRE/pre-load.ll b/test/Transforms/GVN/PRE/pre-load.ll

index ffff2b7f08e53c58a2f7d3988784bb6d97e603b4..1d050f4b6200f574ab6d2e31bdba293fcf132480 100644 (file)
--- a/test/Transforms/GVN/PRE/pre-load.ll
+++ b/test/Transforms/GVN/PRE/pre-load.ll
@@ -430,3 +430,161 @@ cleanup2:
    call void @g(i32 %NOTPRE)
    cleanupret from %c2 unwind to caller
  }
+
+; Don't PRE load across potentially throwing calls.
+
+define i32 @test13(i32* noalias nocapture readonly %x, i32* noalias nocapture %r, i32 %a) {
+
+; CHECK-LABEL: @test13(
+; CHECK: entry:
+; CHECK-NEXT: icmp eq
+; CHECK-NEXT: br i1
+
+entry:
+  %tobool = icmp eq i32 %a, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: if.then:
+; CHECK-NEXT: load i32
+; CHECK-NEXT: store i32
+
+if.then:
+  %uu = load i32, i32* %x, align 4
+  store i32 %uu, i32* %r, align 4
+  br label %if.end
+
+; CHECK: if.end:
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: load i32
+
+if.end:
+  call void @f()
+  %vv = load i32, i32* %x, align 4
+  ret i32 %vv
+}
+
+; Same as test13, but now the blocking function is not immediately in load's
+; block.
+
+define i32 @test14(i32* noalias nocapture readonly %x, i32* noalias nocapture %r, i32 %a) {
+
+; CHECK-LABEL: @test14(
+; CHECK: entry:
+; CHECK-NEXT: icmp eq
+; CHECK-NEXT: br i1
+
+entry:
+  %tobool = icmp eq i32 %a, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: if.then:
+; CHECK-NEXT: load i32
+; CHECK-NEXT: store i32
+
+if.then:
+  %uu = load i32, i32* %x, align 4
+  store i32 %uu, i32* %r, align 4
+  br label %if.end
+
+; CHECK: if.end:
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: load i32
+
+if.end:
+  call void @f()
+  br label %follow_1
+
+follow_1:
+  br label %follow_2
+
+follow_2:
+  %vv = load i32, i32* %x, align 4
+  ret i32 %vv
+}
+
+; Same as test13, but %x here is dereferenceable. A pointer that is
+; dereferenceable can be loaded from speculatively without a risk of trapping.
+; Since it is OK to speculate, PRE is allowed.
+
+define i32 @test15(i32* noalias nocapture readonly dereferenceable(8) %x, i32* noalias nocapture %r, i32 %a) {
+
+; CHECK-LABEL: @test15
+; CHECK: entry:
+; CHECK-NEXT: icmp eq
+; CHECK-NEXT: br i1
+
+entry:
+  %tobool = icmp eq i32 %a, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: entry.if.end_crit_edge:
+; CHECK-NEXT: %vv.pre = load i32, i32* %x, align 4
+; CHECK-NEXT: br label %if.end
+
+if.then:
+  %uu = load i32, i32* %x, align 4
+  store i32 %uu, i32* %r, align 4
+  br label %if.end
+
+; CHECK: if.then:
+; CHECK-NEXT: %uu = load i32, i32* %x, align 4
+; CHECK-NEXT: store i32 %uu, i32* %r, align 4
+; CHECK-NEXT: br label %if.end
+
+if.end:
+  call void @f()
+  %vv = load i32, i32* %x, align 4
+  ret i32 %vv
+
+; CHECK: if.end:
+; CHECK-NEXT: %vv = phi i32 [ %vv.pre, %entry.if.end_crit_edge ], [ %uu, %if.then ]
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: ret i32 %vv
+
+}
+
+; Same as test14, but %x here is dereferenceable. A pointer that is
+; dereferenceable can be loaded from speculatively without a risk of trapping.
+; Since it is OK to speculate, PRE is allowed.
+
+define i32 @test16(i32* noalias nocapture readonly dereferenceable(8) %x, i32* noalias nocapture %r, i32 %a) {
+
+; CHECK-LABEL: @test16(
+; CHECK: entry:
+; CHECK-NEXT: icmp eq
+; CHECK-NEXT: br i1
+
+entry:
+  %tobool = icmp eq i32 %a, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: entry.if.end_crit_edge:
+; CHECK-NEXT: %vv.pre = load i32, i32* %x, align 4
+; CHECK-NEXT: br label %if.end
+
+if.then:
+  %uu = load i32, i32* %x, align 4
+  store i32 %uu, i32* %r, align 4
+  br label %if.end
+
+; CHECK: if.then:
+; CHECK-NEXT: %uu = load i32, i32* %x, align 4
+; CHECK-NEXT: store i32 %uu, i32* %r, align 4
+; CHECK-NEXT: br label %if.end
+
+if.end:
+  call void @f()
+  br label %follow_1
+
+; CHECK: if.end:
+; CHECK-NEXT: %vv = phi i32 [ %vv.pre, %entry.if.end_crit_edge ], [ %uu, %if.then ]
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: ret i32 %vv
+
+follow_1:
+  br label %follow_2
+
+follow_2:
+  %vv = load i32, i32* %x, align 4
+  ret i32 %vv
+}
author	Max Kazantsev <max.kazantsev@azul.com>
	Tue, 31 Oct 2017 05:07:56 +0000 (05:07 +0000)
committer	Max Kazantsev <max.kazantsev@azul.com>
	Tue, 31 Oct 2017 05:07:56 +0000 (05:07 +0000)
include/llvm/Transforms/Scalar/GVN.h		patch \| blob \| history
lib/Transforms/Scalar/GVN.cpp		patch \| blob \| history
test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll		patch \| blob \| history
test/Transforms/GVN/PRE/pre-load-guards.ll	[new file with mode: 0644]	patch \| blob
test/Transforms/GVN/PRE/pre-load-implicit-cf-updates.ll	[new file with mode: 0644]	patch \| blob
test/Transforms/GVN/PRE/pre-load.ll		patch \| blob \| history