From 057fc35f6fd7ddb9aa48e082ed42f205ccf13c98 Mon Sep 17 00:00:00 2001
From: Max Kazantsev <max.kazantsev@azul.com>
Date: Tue, 31 Oct 2017 05:07:56 +0000
Subject: [PATCH] Reapply "[GVN] Prevent LoadPRE from hoisting across
 instructions that don't pass control flow to successors"

This patch fixes the miscompile that happens when PRE hoists loads across guards and
other instructions that don't always pass control flow to their successors. PRE is now prohibited
to hoist across such instructions because there is no guarantee that the load standing after such
instruction is still valid before such instruction. For example, a load from under a guard may be
invalid before the guard in the following case:
  int array[LEN];
  ...
  guard(0 <= index && index < LEN);
  use(array[index]);

Differential Revision: https://reviews.llvm.org/D37460


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@316975 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Transforms/Scalar/GVN.h          |   7 +
 lib/Transforms/Scalar/GVN.cpp                 |  97 +++++++++++
 .../GVN/PRE/2017-10-16-LoadPRECrash.ll        |   7 +
 test/Transforms/GVN/PRE/pre-load-guards.ll    | 146 ++++++++++++++++
 .../GVN/PRE/pre-load-implicit-cf-updates.ll   | 118 +++++++++++++
 test/Transforms/GVN/PRE/pre-load.ll           | 158 ++++++++++++++++++
 6 files changed, 533 insertions(+)
 create mode 100644 test/Transforms/GVN/PRE/pre-load-guards.ll
 create mode 100644 test/Transforms/GVN/PRE/pre-load-implicit-cf-updates.ll

diff --git a/include/llvm/Transforms/Scalar/GVN.h b/include/llvm/Transforms/Scalar/GVN.h
index 251492a4e5d..440d3f67c35 100644
--- a/include/llvm/Transforms/Scalar/GVN.h
+++ b/include/llvm/Transforms/Scalar/GVN.h
@@ -18,6 +18,7 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -27,6 +28,7 @@
 #include "llvm/IR/PassManager.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Transforms/Utils/OrderedInstructions.h"
 #include <cstdint>
 #include <utility>
 #include <vector>
@@ -156,7 +158,11 @@ private:
   AssumptionCache *AC;
   SetVector<BasicBlock *> DeadBlocks;
   OptimizationRemarkEmitter *ORE;
+  // Maps a block to the topmost instruction with implicit control flow in it.
+  DenseMap<const BasicBlock *, const Instruction *>
+      FirstImplicitControlFlowInsts;
 
+  OrderedInstructions *OI;
   ValueTable VN;
 
   /// A mapping from value numbers to lists of Value*'s that
@@ -268,6 +274,7 @@ private:
                                  BasicBlock *Curr, unsigned int ValNo);
   Value *findLeader(const BasicBlock *BB, uint32_t num);
   void cleanupGlobalSets();
+  void fillImplicitControlFlowInfo(BasicBlock *BB);
   void verifyRemoved(const Instruction *I) const;
   bool splitCriticalEdges();
   BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ);
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 7e416368468..45b44368a3d 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -38,6 +38,7 @@
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/PHITransAddr.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
@@ -1048,7 +1049,32 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
   // backwards through predecessors if needed.
   BasicBlock *LoadBB = LI->getParent();
   BasicBlock *TmpBB = LoadBB;
+  bool IsSafeToSpeculativelyExecute = isSafeToSpeculativelyExecute(LI);
 
+  // Check that there is no implicit control flow instructions above our load in
+  // its block. If there is an instruction that doesn't always pass the
+  // execution to the following instruction, then moving through it may become
+  // invalid. For example:
+  //
+  // int arr[LEN];
+  // int index = ???;
+  // ...
+  // guard(0 <= index && index < LEN);
+  // use(arr[index]);
+  //
+  // It is illegal to move the array access to any point above the guard,
+  // because if the index is out of bounds we should deoptimize rather than
+  // access the array.
+  // Check that there is no guard in this block above our intruction.
+  if (!IsSafeToSpeculativelyExecute) {
+    auto It = FirstImplicitControlFlowInsts.find(TmpBB);
+    if (It != FirstImplicitControlFlowInsts.end()) {
+      assert(It->second->getParent() == TmpBB &&
+             "Implicit control flow map broken?");
+      if (OI->dominates(It->second, LI))
+        return false;
+    }
+  }
   while (TmpBB->getSinglePredecessor()) {
     TmpBB = TmpBB->getSinglePredecessor();
     if (TmpBB == LoadBB) // Infinite (unreachable) loop.
@@ -1063,6 +1089,11 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
     // which it was not previously executed.
     if (TmpBB->getTerminator()->getNumSuccessors() != 1)
       return false;
+
+    // Check that there is no implicit control flow in a block above.
+    if (!IsSafeToSpeculativelyExecute &&
+        FirstImplicitControlFlowInsts.count(TmpBB))
+      return false;
   }
 
   assert(TmpBB);
@@ -1982,6 +2013,8 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
   TLI = &RunTLI;
   VN.setAliasAnalysis(&RunAA);
   MD = RunMD;
+  OrderedInstructions OrderedInstrs(DT);
+  OI = &OrderedInstrs;
   VN.setMemDep(MD);
   ORE = RunORE;
 
@@ -2064,14 +2097,26 @@ bool GVN::processBlock(BasicBlock *BB) {
     if (!AtStart)
       --BI;
 
+    bool InvalidateImplicitCF = false;
+    const Instruction *MaybeFirstICF = FirstImplicitControlFlowInsts.lookup(BB);
     for (auto *I : InstrsToErase) {
       assert(I->getParent() == BB && "Removing instruction from wrong block?");
       DEBUG(dbgs() << "GVN removed: " << *I << '\n');
       if (MD) MD->removeInstruction(I);
       DEBUG(verifyRemoved(I));
+      if (MaybeFirstICF == I) {
+        // We have erased the first ICF in block. The map needs to be updated.
+        InvalidateImplicitCF = true;
+        // Do not keep dangling pointer on the erased instruction.
+        MaybeFirstICF = nullptr;
+      }
       I->eraseFromParent();
     }
+
+    OI->invalidateBlock(BB);
     InstrsToErase.clear();
+    if (InvalidateImplicitCF)
+      fillImplicitControlFlowInfo(BB);
 
     if (AtStart)
       BI = BB->begin();
@@ -2265,7 +2310,14 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
   if (MD)
     MD->removeInstruction(CurInst);
   DEBUG(verifyRemoved(CurInst));
+  bool InvalidateImplicitCF =
+      FirstImplicitControlFlowInsts.lookup(CurInst->getParent()) == CurInst;
+  // FIXME: Intended to be markInstructionForDeletion(CurInst), but it causes
+  // some assertion failures.
+  OI->invalidateBlock(CurrentBlock);
   CurInst->eraseFromParent();
+  if (InvalidateImplicitCF)
+    fillImplicitControlFlowInfo(CurrentBlock);
   ++NumGVNInstr;
 
   return true;
@@ -2332,6 +2384,9 @@ bool GVN::iterateOnFunction(Function &F) {
   // RPOT walks the graph in its constructor and will not be invalidated during
   // processBlock.
   ReversePostOrderTraversal<Function *> RPOT(&F);
+
+  for (BasicBlock *BB : RPOT)
+    fillImplicitControlFlowInfo(BB);
   for (BasicBlock *BB : RPOT)
     Changed |= processBlock(BB);
 
@@ -2343,6 +2398,48 @@ void GVN::cleanupGlobalSets() {
   LeaderTable.clear();
   BlockRPONumber.clear();
   TableAllocator.Reset();
+  FirstImplicitControlFlowInsts.clear();
+}
+
+void
+GVN::fillImplicitControlFlowInfo(BasicBlock *BB) {
+  // Make sure that all marked instructions are actually deleted by this point,
+  // so that we don't need to care about omitting them.
+  assert(InstrsToErase.empty() && "Filling before removed all marked insns?");
+  auto MayNotTransferExecutionToSuccessor = [&](const Instruction *I) {
+    // If a block's instruction doesn't always pass the control to its successor
+    // instruction, mark the block as having implicit control flow. We use them
+    // to avoid wrong assumptions of sort "if A is executed and B post-dominates
+    // A, then B is also executed". This is not true is there is an implicit
+    // control flow instruction (e.g. a guard) between them.
+    //
+    // TODO: Currently, isGuaranteedToTransferExecutionToSuccessor returns false
+    // for volatile stores and loads because they can trap. The discussion on
+    // whether or not it is correct is still ongoing. We might want to get rid
+    // of this logic in the future. Anyways, trapping instructions shouldn't
+    // introduce implicit control flow, so we explicitly allow them here. This
+    // must be removed once isGuaranteedToTransferExecutionToSuccessor is fixed.
+    if (isGuaranteedToTransferExecutionToSuccessor(I))
+      return false;
+    if (auto *LI = dyn_cast<LoadInst>(I)) {
+      assert(LI->isVolatile() && "Non-volatile load should transfer execution"
+                                 " to successor!");
+      return false;
+    }
+    if (auto *SI = dyn_cast<StoreInst>(I)) {
+      assert(SI->isVolatile() && "Non-volatile store should transfer execution"
+                                 " to successor!");
+      return false;
+    }
+    return true;
+  };
+  FirstImplicitControlFlowInsts.erase(BB);
+
+  for (auto &I : *BB)
+    if (MayNotTransferExecutionToSuccessor(&I)) {
+      FirstImplicitControlFlowInsts[BB] = &I;
+      break;
+    }
 }
 
 /// Verify that the specified instruction does not occur in our
diff --git a/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll b/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll
index 5fbb0fcc511..14f65a4a24a 100644
--- a/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll
+++ b/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll
@@ -17,6 +17,13 @@ declare i64* @getaddr_i64(i64 addrspace(100)*) #0
 define hidden void @wrapon_fn173() {
 
 ; CHECK-LABEL: @wrapon_fn173
+; CHECK:       entry:
+; CHECK-NEXT:    call %ArrayImpl* @getaddr_ArrayImpl(%ArrayImpl addrspace(100)* undef)
+; CHECK-NEXT:    %.pre = load i64 addrspace(100)*, i64 addrspace(100)** null, align 8
+; CHECK-NEXT:    br label %loop
+; CHECK:       loop:
+; CHECK-NEXT:    call i64* @getaddr_i64(i64 addrspace(100)* %.pre)
+; CHECK-NEXT:    br label %loop
 
 entry:
   %0 = call %ArrayImpl* @getaddr_ArrayImpl(%ArrayImpl addrspace(100)* undef)
diff --git a/test/Transforms/GVN/PRE/pre-load-guards.ll b/test/Transforms/GVN/PRE/pre-load-guards.ll
new file mode 100644
index 00000000000..eeb3ef649d9
--- /dev/null
+++ b/test/Transforms/GVN/PRE/pre-load-guards.ll
@@ -0,0 +1,146 @@
+; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+declare void @llvm.experimental.guard(i1, ...)
+
+; This is a motivating example on why we prohibit hoisting through guards.
+; In the bottom block, we check that the index is within bounds and only access
+; the element in this case and deoptimize otherwise. If we hoist the load to a
+; place above the guard, it will may lead to out-of-bound array access.
+define i32 @test_motivation(i32* %p, i32* %q, i1 %C, i32 %index, i32 %len) {
+; CHECK-LABEL: @test_motivation(
+block1:
+  %el1 = getelementptr inbounds i32, i32* %q, i32 %index
+  %el2 = getelementptr inbounds i32, i32* %p, i32 %index
+	br i1 %C, label %block2, label %block3
+
+block2:
+
+; CHECK:        block2:
+; CHECK-NEXT:     br
+; CHECK-NOT:      load
+; CHECK-NOT:      sge
+; CHECK-NOT:      slt
+; CHECK-NOT:      and
+  br label %block4
+
+block3:
+  store i32 0, i32* %el1
+  br label %block4
+
+block4:
+
+; CHECK:        block4:
+; CHECK:          %cond1 = icmp sge i32 %index, 0
+; CHECK-NEXT:     %cond2 = icmp slt i32 %index, %len
+; CHECK-NEXT:     %in.bounds = and i1 %cond1, %cond2
+; CHECK:          call void (i1, ...) @llvm.experimental.guard(i1 %in.bounds)
+; CHECK-NEXT:     %PRE = load i32, i32* %P2
+; CHECK:          ret i32 %PRE
+
+  %P2 = phi i32* [%el2, %block3], [%el1, %block2]
+  %cond1 = icmp sge i32 %index, 0
+  %cond2 = icmp slt i32 %index, %len
+  %in.bounds = and i1 %cond1, %cond2
+  call void (i1, ...) @llvm.experimental.guard(i1 %in.bounds) [ "deopt"() ]
+  %PRE = load i32, i32* %P2
+  ret i32 %PRE
+}
+
+; Guard in load's block that is above the load should prohibit the PRE.
+define i32 @test_guard_01(i32* %p, i32* %q, i1 %C, i1 %G) {
+; CHECK-LABEL: @test_guard_01(
+block1:
+	br i1 %C, label %block2, label %block3
+
+block2:
+
+; CHECK:        block2:
+; CHECK-NEXT:     br
+; CHECK-NOT:      load
+
+ br label %block4
+
+block3:
+  store i32 0, i32* %p
+  br label %block4
+
+block4:
+
+; CHECK:        block4:
+; CHECK:          call void (i1, ...) @llvm.experimental.guard(i1 %G)
+; CHECK-NEXT:     load
+; CHECK:          ret i32
+
+  %P2 = phi i32* [%p, %block3], [%q, %block2]
+  call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ]
+  %PRE = load i32, i32* %P2
+  ret i32 %PRE
+}
+
+; Guard in load's block that is below the load should not prohibit the PRE.
+define i32 @test_guard_02(i32* %p, i32* %q, i1 %C, i1 %G) {
+; CHECK-LABEL: @test_guard_02(
+block1:
+	br i1 %C, label %block2, label %block3
+
+block2:
+
+; CHECK:        block2:
+; CHECK-NEXT:     load i32, i32* %q
+
+ br label %block4
+
+block3:
+  store i32 0, i32* %p
+  br label %block4
+
+block4:
+
+; CHECK:        block4:
+; CHECK-NEXT:     phi i32 [
+; CHECK-NEXT:     phi i32* [
+; CHECK-NEXT:     call void (i1, ...) @llvm.experimental.guard(i1 %G)
+; CHECK-NOT:      load
+; CHECK:          ret i32
+
+  %P2 = phi i32* [%p, %block3], [%q, %block2]
+  %PRE = load i32, i32* %P2
+  call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ]
+  ret i32 %PRE
+}
+
+; Guard above the load's block should prevent PRE from hoisting through it.
+define i32 @test_guard_03(i32* %p, i32* %q, i1 %C, i1 %G) {
+; CHECK-LABEL: @test_guard_03(
+block1:
+	br i1 %C, label %block2, label %block3
+
+block2:
+
+; CHECK:        block2:
+; CHECK-NEXT:     br
+; CHECK-NOT:      load
+
+ br label %block4
+
+block3:
+  store i32 0, i32* %p
+  br label %block4
+
+block4:
+
+; CHECK:        block4:
+; CHECK-NEXT:     phi i32*
+; CHECK-NEXT:     call void (i1, ...) @llvm.experimental.guard(i1 %G)
+; CHECK-NEXT:     load
+; CHECK-NEXT:     ret i32
+
+  %P2 = phi i32* [%p, %block3], [%q, %block2]
+  call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ]
+  br label %block5
+
+block5:
+  %PRE = load i32, i32* %P2
+  ret i32 %PRE
+}
diff --git a/test/Transforms/GVN/PRE/pre-load-implicit-cf-updates.ll b/test/Transforms/GVN/PRE/pre-load-implicit-cf-updates.ll
new file mode 100644
index 00000000000..c131e923997
--- /dev/null
+++ b/test/Transforms/GVN/PRE/pre-load-implicit-cf-updates.ll
@@ -0,0 +1,118 @@
+; RUN: opt -S -gvn -enable-load-pre < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; These tests exercise situations when instructions that were first instructions
+; with implicit control flow get removed. We make sure that after that we don't
+; face crashes and are still able to perform PRE correctly.
+
+declare i32 @foo(i32 %arg) #0
+
+define hidden void @test_01(i32 %x, i32 %y) {
+
+; c2 only throws if c1 throws, so it can be safely removed and then PRE can
+; hoist the load out of loop.
+
+; CHECK-LABEL: @test_01
+; CHECK:       entry:
+; CHECK-NEXT:    %c1 = call i32 @foo(i32 %x)
+; CHECK-NEXT:    %val.pre = load i32, i32* null, align 8
+; CHECK-NEXT:    br label %loop
+; CHECK:       loop:
+; CHECK-NEXT:    %c3 = call i32 @foo(i32 %val.pre)
+; CHECK-NEXT:    br label %loop
+
+entry:
+  %c1 = call i32 @foo(i32 %x)
+  br label %loop
+
+loop:
+  %c2 = call i32 @foo(i32 %x)
+  %val = load i32, i32* null, align 8
+  %c3 = call i32 @foo(i32 %val)
+  br label %loop
+}
+
+define hidden void @test_02(i32 %x, i32 %y) {
+
+; PRE is not allowed because c2 may throw.
+
+; CHECK-LABEL: @test_02
+; CHECK:       entry:
+; CHECK-NEXT:    %c1 = call i32 @foo(i32 %x)
+; CHECK-NEXT:    br label %loop
+; CHECK:       loop:
+; CHECK-NEXT:    %c2 = call i32 @foo(i32 %y)
+; CHECK-NEXT:    %val = load i32, i32* null, align 8
+; CHECK-NEXT:    %c3 = call i32 @foo(i32 %val)
+; CHECK-NEXT:    br label %loop
+
+entry:
+  %c1 = call i32 @foo(i32 %x)
+  br label %loop
+
+loop:
+  %c2 = call i32 @foo(i32 %y)
+  %val = load i32, i32* null, align 8
+  %c3 = call i32 @foo(i32 %val)
+  br label %loop
+}
+
+define hidden void @test_03(i32 %x, i32 %y) {
+
+; PRE of load is allowed because c2 only throws if c1 throws. c3 should
+; not be eliminated. c4 is eliminated because it only throws if c3 throws.
+
+; CHECK-LABEL: @test_03
+; CHECK:       entry:
+; CHECK-NEXT:    %c1 = call i32 @foo(i32 %x)
+; CHECK-NEXT:    %val.pre = load i32, i32* null, align 8
+; CHECK-NEXT:    br label %loop
+; CHECK:       loop:
+; CHECK-NEXT:    %c3 = call i32 @foo(i32 %y)
+; CHECK-NEXT:    %c5 = call i32 @foo(i32 %val.pre)
+; CHECK-NEXT:    br label %loop
+
+entry:
+  %c1 = call i32 @foo(i32 %x)
+  br label %loop
+
+loop:
+  %c2 = call i32 @foo(i32 %x)
+  %val = load i32, i32* null, align 8
+  %c3 = call i32 @foo(i32 %y)
+  %val2 = load i32, i32* null, align 8
+  %c4 = call i32 @foo(i32 %y)
+  %c5 = call i32 @foo(i32 %val)
+  br label %loop
+}
+
+define hidden void @test_04(i32 %x, i32 %y) {
+
+; PRE is not allowed even after we remove c2 because now c3 prevents us from it.
+
+; CHECK-LABEL: @test_04
+; CHECK:       entry:
+; CHECK-NEXT:    %c1 = call i32 @foo(i32 %x)
+; CHECK-NEXT:    br label %loop
+; CHECK:       loop:
+; CHECK-NEXT:    %c3 = call i32 @foo(i32 %y)
+; CHECK-NEXT:    %val = load i32, i32* null, align 8
+; CHECK-NEXT:    %c5 = call i32 @foo(i32 %val)
+; CHECK-NEXT:    br label %loop
+
+entry:
+  %c1 = call i32 @foo(i32 %x)
+  br label %loop
+
+loop:
+  %c2 = call i32 @foo(i32 %x)
+  %c3 = call i32 @foo(i32 %y)
+  %val = load i32, i32* null, align 8
+  %c4 = call i32 @foo(i32 %y)
+  %c5 = call i32 @foo(i32 %val)
+  br label %loop
+}
+
+attributes #0 = { readnone }
diff --git a/test/Transforms/GVN/PRE/pre-load.ll b/test/Transforms/GVN/PRE/pre-load.ll
index ffff2b7f08e..1d050f4b620 100644
--- a/test/Transforms/GVN/PRE/pre-load.ll
+++ b/test/Transforms/GVN/PRE/pre-load.ll
@@ -430,3 +430,161 @@ cleanup2:
   call void @g(i32 %NOTPRE)
   cleanupret from %c2 unwind to caller
 }
+
+; Don't PRE load across potentially throwing calls.
+
+define i32 @test13(i32* noalias nocapture readonly %x, i32* noalias nocapture %r, i32 %a) {
+
+; CHECK-LABEL: @test13(
+; CHECK: entry:
+; CHECK-NEXT: icmp eq
+; CHECK-NEXT: br i1
+
+entry:
+  %tobool = icmp eq i32 %a, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: if.then:
+; CHECK-NEXT: load i32
+; CHECK-NEXT: store i32
+
+if.then:
+  %uu = load i32, i32* %x, align 4
+  store i32 %uu, i32* %r, align 4
+  br label %if.end
+
+; CHECK: if.end:
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: load i32
+
+if.end:
+  call void @f()
+  %vv = load i32, i32* %x, align 4
+  ret i32 %vv
+}
+
+; Same as test13, but now the blocking function is not immediately in load's
+; block.
+
+define i32 @test14(i32* noalias nocapture readonly %x, i32* noalias nocapture %r, i32 %a) {
+
+; CHECK-LABEL: @test14(
+; CHECK: entry:
+; CHECK-NEXT: icmp eq
+; CHECK-NEXT: br i1
+
+entry:
+  %tobool = icmp eq i32 %a, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: if.then:
+; CHECK-NEXT: load i32
+; CHECK-NEXT: store i32
+
+if.then:
+  %uu = load i32, i32* %x, align 4
+  store i32 %uu, i32* %r, align 4
+  br label %if.end
+
+; CHECK: if.end:
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: load i32
+
+if.end:
+  call void @f()
+  br label %follow_1
+
+follow_1:
+  br label %follow_2
+
+follow_2:
+  %vv = load i32, i32* %x, align 4
+  ret i32 %vv
+}
+
+; Same as test13, but %x here is dereferenceable. A pointer that is
+; dereferenceable can be loaded from speculatively without a risk of trapping.
+; Since it is OK to speculate, PRE is allowed.
+
+define i32 @test15(i32* noalias nocapture readonly dereferenceable(8) %x, i32* noalias nocapture %r, i32 %a) {
+
+; CHECK-LABEL: @test15
+; CHECK: entry:
+; CHECK-NEXT: icmp eq
+; CHECK-NEXT: br i1
+
+entry:
+  %tobool = icmp eq i32 %a, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: entry.if.end_crit_edge:
+; CHECK-NEXT: %vv.pre = load i32, i32* %x, align 4
+; CHECK-NEXT: br label %if.end
+
+if.then:
+  %uu = load i32, i32* %x, align 4
+  store i32 %uu, i32* %r, align 4
+  br label %if.end
+
+; CHECK: if.then:
+; CHECK-NEXT: %uu = load i32, i32* %x, align 4
+; CHECK-NEXT: store i32 %uu, i32* %r, align 4
+; CHECK-NEXT: br label %if.end
+
+if.end:
+  call void @f()
+  %vv = load i32, i32* %x, align 4
+  ret i32 %vv
+
+; CHECK: if.end:
+; CHECK-NEXT: %vv = phi i32 [ %vv.pre, %entry.if.end_crit_edge ], [ %uu, %if.then ]
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: ret i32 %vv
+
+}
+
+; Same as test14, but %x here is dereferenceable. A pointer that is
+; dereferenceable can be loaded from speculatively without a risk of trapping.
+; Since it is OK to speculate, PRE is allowed.
+
+define i32 @test16(i32* noalias nocapture readonly dereferenceable(8) %x, i32* noalias nocapture %r, i32 %a) {
+
+; CHECK-LABEL: @test16(
+; CHECK: entry:
+; CHECK-NEXT: icmp eq
+; CHECK-NEXT: br i1
+
+entry:
+  %tobool = icmp eq i32 %a, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: entry.if.end_crit_edge:
+; CHECK-NEXT: %vv.pre = load i32, i32* %x, align 4
+; CHECK-NEXT: br label %if.end
+
+if.then:
+  %uu = load i32, i32* %x, align 4
+  store i32 %uu, i32* %r, align 4
+  br label %if.end
+
+; CHECK: if.then:
+; CHECK-NEXT: %uu = load i32, i32* %x, align 4
+; CHECK-NEXT: store i32 %uu, i32* %r, align 4
+; CHECK-NEXT: br label %if.end
+
+if.end:
+  call void @f()
+  br label %follow_1
+
+; CHECK: if.end:
+; CHECK-NEXT: %vv = phi i32 [ %vv.pre, %entry.if.end_crit_edge ], [ %uu, %if.then ]
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: ret i32 %vv
+
+follow_1:
+  br label %follow_2
+
+follow_2:
+  %vv = load i32, i32* %x, align 4
+  ret i32 %vv
+}
-- 
2.40.0