#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Transforms/Utils/OrderedInstructions.h"
#include <cstdint>
#include <utility>
#include <vector>
AssumptionCache *AC;
SetVector<BasicBlock *> DeadBlocks;
OptimizationRemarkEmitter *ORE;
+ // Maps a block to the topmost instruction with implicit control flow in it.
+ DenseMap<const BasicBlock *, const Instruction *>
+ FirstImplicitControlFlowInsts;
+ OrderedInstructions *OI;
ValueTable VN;
/// A mapping from value numbers to lists of Value*'s that
BasicBlock *Curr, unsigned int ValNo);
Value *findLeader(const BasicBlock *BB, uint32_t num);
void cleanupGlobalSets();
+ void fillImplicitControlFlowInfo(BasicBlock *BB);
void verifyRemoved(const Instruction *I) const;
bool splitCriticalEdges();
BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ);
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/PHITransAddr.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
// backwards through predecessors if needed.
BasicBlock *LoadBB = LI->getParent();
BasicBlock *TmpBB = LoadBB;
+ bool IsSafeToSpeculativelyExecute = isSafeToSpeculativelyExecute(LI);
+ // Check that there is no implicit control flow instructions above our load in
+ // its block. If there is an instruction that doesn't always pass the
+ // execution to the following instruction, then moving through it may become
+ // invalid. For example:
+ //
+ // int arr[LEN];
+ // int index = ???;
+ // ...
+ // guard(0 <= index && index < LEN);
+ // use(arr[index]);
+ //
+ // It is illegal to move the array access to any point above the guard,
+ // because if the index is out of bounds we should deoptimize rather than
+ // access the array.
+ // Check that there is no guard in this block above our intruction.
+ if (!IsSafeToSpeculativelyExecute) {
+ auto It = FirstImplicitControlFlowInsts.find(TmpBB);
+ if (It != FirstImplicitControlFlowInsts.end()) {
+ assert(It->second->getParent() == TmpBB &&
+ "Implicit control flow map broken?");
+ if (OI->dominates(It->second, LI))
+ return false;
+ }
+ }
while (TmpBB->getSinglePredecessor()) {
TmpBB = TmpBB->getSinglePredecessor();
if (TmpBB == LoadBB) // Infinite (unreachable) loop.
// which it was not previously executed.
if (TmpBB->getTerminator()->getNumSuccessors() != 1)
return false;
+
+ // Check that there is no implicit control flow in a block above.
+ if (!IsSafeToSpeculativelyExecute &&
+ FirstImplicitControlFlowInsts.count(TmpBB))
+ return false;
}
assert(TmpBB);
TLI = &RunTLI;
VN.setAliasAnalysis(&RunAA);
MD = RunMD;
+ OrderedInstructions OrderedInstrs(DT);
+ OI = &OrderedInstrs;
VN.setMemDep(MD);
ORE = RunORE;
if (!AtStart)
--BI;
+ bool InvalidateImplicitCF = false;
+ const Instruction *MaybeFirstICF = FirstImplicitControlFlowInsts.lookup(BB);
for (auto *I : InstrsToErase) {
assert(I->getParent() == BB && "Removing instruction from wrong block?");
DEBUG(dbgs() << "GVN removed: " << *I << '\n');
if (MD) MD->removeInstruction(I);
DEBUG(verifyRemoved(I));
+ if (MaybeFirstICF == I) {
+ // We have erased the first ICF in block. The map needs to be updated.
+ InvalidateImplicitCF = true;
+ // Do not keep dangling pointer on the erased instruction.
+ MaybeFirstICF = nullptr;
+ }
I->eraseFromParent();
}
+
+ OI->invalidateBlock(BB);
InstrsToErase.clear();
+ if (InvalidateImplicitCF)
+ fillImplicitControlFlowInfo(BB);
if (AtStart)
BI = BB->begin();
if (MD)
MD->removeInstruction(CurInst);
DEBUG(verifyRemoved(CurInst));
+ bool InvalidateImplicitCF =
+ FirstImplicitControlFlowInsts.lookup(CurInst->getParent()) == CurInst;
+ // FIXME: Intended to be markInstructionForDeletion(CurInst), but it causes
+ // some assertion failures.
+ OI->invalidateBlock(CurrentBlock);
CurInst->eraseFromParent();
+ if (InvalidateImplicitCF)
+ fillImplicitControlFlowInfo(CurrentBlock);
++NumGVNInstr;
return true;
// RPOT walks the graph in its constructor and will not be invalidated during
// processBlock.
ReversePostOrderTraversal<Function *> RPOT(&F);
+
+ for (BasicBlock *BB : RPOT)
+ fillImplicitControlFlowInfo(BB);
for (BasicBlock *BB : RPOT)
Changed |= processBlock(BB);
LeaderTable.clear();
BlockRPONumber.clear();
TableAllocator.Reset();
+ FirstImplicitControlFlowInsts.clear();
+}
+
+void
+GVN::fillImplicitControlFlowInfo(BasicBlock *BB) {
+ // Make sure that all marked instructions are actually deleted by this point,
+ // so that we don't need to care about omitting them.
+ assert(InstrsToErase.empty() && "Filling before removed all marked insns?");
+ auto MayNotTransferExecutionToSuccessor = [&](const Instruction *I) {
+ // If a block's instruction doesn't always pass the control to its successor
+ // instruction, mark the block as having implicit control flow. We use them
+ // to avoid wrong assumptions of sort "if A is executed and B post-dominates
+ // A, then B is also executed". This is not true is there is an implicit
+ // control flow instruction (e.g. a guard) between them.
+ //
+ // TODO: Currently, isGuaranteedToTransferExecutionToSuccessor returns false
+ // for volatile stores and loads because they can trap. The discussion on
+ // whether or not it is correct is still ongoing. We might want to get rid
+ // of this logic in the future. Anyways, trapping instructions shouldn't
+ // introduce implicit control flow, so we explicitly allow them here. This
+ // must be removed once isGuaranteedToTransferExecutionToSuccessor is fixed.
+ if (isGuaranteedToTransferExecutionToSuccessor(I))
+ return false;
+ if (auto *LI = dyn_cast<LoadInst>(I)) {
+ assert(LI->isVolatile() && "Non-volatile load should transfer execution"
+ " to successor!");
+ return false;
+ }
+ if (auto *SI = dyn_cast<StoreInst>(I)) {
+ assert(SI->isVolatile() && "Non-volatile store should transfer execution"
+ " to successor!");
+ return false;
+ }
+ return true;
+ };
+ FirstImplicitControlFlowInsts.erase(BB);
+
+ for (auto &I : *BB)
+ if (MayNotTransferExecutionToSuccessor(&I)) {
+ FirstImplicitControlFlowInsts[BB] = &I;
+ break;
+ }
}
/// Verify that the specified instruction does not occur in our
define hidden void @wrapon_fn173() {
; CHECK-LABEL: @wrapon_fn173
+; CHECK: entry:
+; CHECK-NEXT: call %ArrayImpl* @getaddr_ArrayImpl(%ArrayImpl addrspace(100)* undef)
+; CHECK-NEXT: %.pre = load i64 addrspace(100)*, i64 addrspace(100)** null, align 8
+; CHECK-NEXT: br label %loop
+; CHECK: loop:
+; CHECK-NEXT: call i64* @getaddr_i64(i64 addrspace(100)* %.pre)
+; CHECK-NEXT: br label %loop
entry:
%0 = call %ArrayImpl* @getaddr_ArrayImpl(%ArrayImpl addrspace(100)* undef)
--- /dev/null
+; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+declare void @llvm.experimental.guard(i1, ...)
+
+; This is a motivating example on why we prohibit hoisting through guards.
+; In the bottom block, we check that the index is within bounds and only access
+; the element in this case and deoptimize otherwise. If we hoist the load to a
+; place above the guard, it will may lead to out-of-bound array access.
+define i32 @test_motivation(i32* %p, i32* %q, i1 %C, i32 %index, i32 %len) {
+; CHECK-LABEL: @test_motivation(
+block1:
+ %el1 = getelementptr inbounds i32, i32* %q, i32 %index
+ %el2 = getelementptr inbounds i32, i32* %p, i32 %index
+ br i1 %C, label %block2, label %block3
+
+block2:
+
+; CHECK: block2:
+; CHECK-NEXT: br
+; CHECK-NOT: load
+; CHECK-NOT: sge
+; CHECK-NOT: slt
+; CHECK-NOT: and
+ br label %block4
+
+block3:
+ store i32 0, i32* %el1
+ br label %block4
+
+block4:
+
+; CHECK: block4:
+; CHECK: %cond1 = icmp sge i32 %index, 0
+; CHECK-NEXT: %cond2 = icmp slt i32 %index, %len
+; CHECK-NEXT: %in.bounds = and i1 %cond1, %cond2
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %in.bounds)
+; CHECK-NEXT: %PRE = load i32, i32* %P2
+; CHECK: ret i32 %PRE
+
+ %P2 = phi i32* [%el2, %block3], [%el1, %block2]
+ %cond1 = icmp sge i32 %index, 0
+ %cond2 = icmp slt i32 %index, %len
+ %in.bounds = and i1 %cond1, %cond2
+ call void (i1, ...) @llvm.experimental.guard(i1 %in.bounds) [ "deopt"() ]
+ %PRE = load i32, i32* %P2
+ ret i32 %PRE
+}
+
+; Guard in load's block that is above the load should prohibit the PRE.
+define i32 @test_guard_01(i32* %p, i32* %q, i1 %C, i1 %G) {
+; CHECK-LABEL: @test_guard_01(
+block1:
+ br i1 %C, label %block2, label %block3
+
+block2:
+
+; CHECK: block2:
+; CHECK-NEXT: br
+; CHECK-NOT: load
+
+ br label %block4
+
+block3:
+ store i32 0, i32* %p
+ br label %block4
+
+block4:
+
+; CHECK: block4:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %G)
+; CHECK-NEXT: load
+; CHECK: ret i32
+
+ %P2 = phi i32* [%p, %block3], [%q, %block2]
+ call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ]
+ %PRE = load i32, i32* %P2
+ ret i32 %PRE
+}
+
+; Guard in load's block that is below the load should not prohibit the PRE.
+define i32 @test_guard_02(i32* %p, i32* %q, i1 %C, i1 %G) {
+; CHECK-LABEL: @test_guard_02(
+block1:
+ br i1 %C, label %block2, label %block3
+
+block2:
+
+; CHECK: block2:
+; CHECK-NEXT: load i32, i32* %q
+
+ br label %block4
+
+block3:
+ store i32 0, i32* %p
+ br label %block4
+
+block4:
+
+; CHECK: block4:
+; CHECK-NEXT: phi i32 [
+; CHECK-NEXT: phi i32* [
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %G)
+; CHECK-NOT: load
+; CHECK: ret i32
+
+ %P2 = phi i32* [%p, %block3], [%q, %block2]
+ %PRE = load i32, i32* %P2
+ call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ]
+ ret i32 %PRE
+}
+
+; Guard above the load's block should prevent PRE from hoisting through it.
+define i32 @test_guard_03(i32* %p, i32* %q, i1 %C, i1 %G) {
+; CHECK-LABEL: @test_guard_03(
+block1:
+ br i1 %C, label %block2, label %block3
+
+block2:
+
+; CHECK: block2:
+; CHECK-NEXT: br
+; CHECK-NOT: load
+
+ br label %block4
+
+block3:
+ store i32 0, i32* %p
+ br label %block4
+
+block4:
+
+; CHECK: block4:
+; CHECK-NEXT: phi i32*
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %G)
+; CHECK-NEXT: load
+; CHECK-NEXT: ret i32
+
+ %P2 = phi i32* [%p, %block3], [%q, %block2]
+ call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ]
+ br label %block5
+
+block5:
+ %PRE = load i32, i32* %P2
+ ret i32 %PRE
+}
--- /dev/null
+; RUN: opt -S -gvn -enable-load-pre < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; These tests exercise situations when instructions that were first instructions
+; with implicit control flow get removed. We make sure that after that we don't
+; face crashes and are still able to perform PRE correctly.
+
+declare i32 @foo(i32 %arg) #0
+
+define hidden void @test_01(i32 %x, i32 %y) {
+
+; c2 only throws if c1 throws, so it can be safely removed and then PRE can
+; hoist the load out of loop.
+
+; CHECK-LABEL: @test_01
+; CHECK: entry:
+; CHECK-NEXT: %c1 = call i32 @foo(i32 %x)
+; CHECK-NEXT: %val.pre = load i32, i32* null, align 8
+; CHECK-NEXT: br label %loop
+; CHECK: loop:
+; CHECK-NEXT: %c3 = call i32 @foo(i32 %val.pre)
+; CHECK-NEXT: br label %loop
+
+entry:
+ %c1 = call i32 @foo(i32 %x)
+ br label %loop
+
+loop:
+ %c2 = call i32 @foo(i32 %x)
+ %val = load i32, i32* null, align 8
+ %c3 = call i32 @foo(i32 %val)
+ br label %loop
+}
+
+define hidden void @test_02(i32 %x, i32 %y) {
+
+; PRE is not allowed because c2 may throw.
+
+; CHECK-LABEL: @test_02
+; CHECK: entry:
+; CHECK-NEXT: %c1 = call i32 @foo(i32 %x)
+; CHECK-NEXT: br label %loop
+; CHECK: loop:
+; CHECK-NEXT: %c2 = call i32 @foo(i32 %y)
+; CHECK-NEXT: %val = load i32, i32* null, align 8
+; CHECK-NEXT: %c3 = call i32 @foo(i32 %val)
+; CHECK-NEXT: br label %loop
+
+entry:
+ %c1 = call i32 @foo(i32 %x)
+ br label %loop
+
+loop:
+ %c2 = call i32 @foo(i32 %y)
+ %val = load i32, i32* null, align 8
+ %c3 = call i32 @foo(i32 %val)
+ br label %loop
+}
+
+define hidden void @test_03(i32 %x, i32 %y) {
+
+; PRE of load is allowed because c2 only throws if c1 throws. c3 should
+; not be eliminated. c4 is eliminated because it only throws if c3 throws.
+
+; CHECK-LABEL: @test_03
+; CHECK: entry:
+; CHECK-NEXT: %c1 = call i32 @foo(i32 %x)
+; CHECK-NEXT: %val.pre = load i32, i32* null, align 8
+; CHECK-NEXT: br label %loop
+; CHECK: loop:
+; CHECK-NEXT: %c3 = call i32 @foo(i32 %y)
+; CHECK-NEXT: %c5 = call i32 @foo(i32 %val.pre)
+; CHECK-NEXT: br label %loop
+
+entry:
+ %c1 = call i32 @foo(i32 %x)
+ br label %loop
+
+loop:
+ %c2 = call i32 @foo(i32 %x)
+ %val = load i32, i32* null, align 8
+ %c3 = call i32 @foo(i32 %y)
+ %val2 = load i32, i32* null, align 8
+ %c4 = call i32 @foo(i32 %y)
+ %c5 = call i32 @foo(i32 %val)
+ br label %loop
+}
+
+define hidden void @test_04(i32 %x, i32 %y) {
+
+; PRE is not allowed even after we remove c2 because now c3 prevents us from it.
+
+; CHECK-LABEL: @test_04
+; CHECK: entry:
+; CHECK-NEXT: %c1 = call i32 @foo(i32 %x)
+; CHECK-NEXT: br label %loop
+; CHECK: loop:
+; CHECK-NEXT: %c3 = call i32 @foo(i32 %y)
+; CHECK-NEXT: %val = load i32, i32* null, align 8
+; CHECK-NEXT: %c5 = call i32 @foo(i32 %val)
+; CHECK-NEXT: br label %loop
+
+entry:
+ %c1 = call i32 @foo(i32 %x)
+ br label %loop
+
+loop:
+ %c2 = call i32 @foo(i32 %x)
+ %c3 = call i32 @foo(i32 %y)
+ %val = load i32, i32* null, align 8
+ %c4 = call i32 @foo(i32 %y)
+ %c5 = call i32 @foo(i32 %val)
+ br label %loop
+}
+
+attributes #0 = { readnone }
call void @g(i32 %NOTPRE)
cleanupret from %c2 unwind to caller
}
+
+; Don't PRE load across potentially throwing calls.
+
+define i32 @test13(i32* noalias nocapture readonly %x, i32* noalias nocapture %r, i32 %a) {
+
+; CHECK-LABEL: @test13(
+; CHECK: entry:
+; CHECK-NEXT: icmp eq
+; CHECK-NEXT: br i1
+
+entry:
+ %tobool = icmp eq i32 %a, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: if.then:
+; CHECK-NEXT: load i32
+; CHECK-NEXT: store i32
+
+if.then:
+ %uu = load i32, i32* %x, align 4
+ store i32 %uu, i32* %r, align 4
+ br label %if.end
+
+; CHECK: if.end:
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: load i32
+
+if.end:
+ call void @f()
+ %vv = load i32, i32* %x, align 4
+ ret i32 %vv
+}
+
+; Same as test13, but now the blocking function is not immediately in load's
+; block.
+
+define i32 @test14(i32* noalias nocapture readonly %x, i32* noalias nocapture %r, i32 %a) {
+
+; CHECK-LABEL: @test14(
+; CHECK: entry:
+; CHECK-NEXT: icmp eq
+; CHECK-NEXT: br i1
+
+entry:
+ %tobool = icmp eq i32 %a, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: if.then:
+; CHECK-NEXT: load i32
+; CHECK-NEXT: store i32
+
+if.then:
+ %uu = load i32, i32* %x, align 4
+ store i32 %uu, i32* %r, align 4
+ br label %if.end
+
+; CHECK: if.end:
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: load i32
+
+if.end:
+ call void @f()
+ br label %follow_1
+
+follow_1:
+ br label %follow_2
+
+follow_2:
+ %vv = load i32, i32* %x, align 4
+ ret i32 %vv
+}
+
+; Same as test13, but %x here is dereferenceable. A pointer that is
+; dereferenceable can be loaded from speculatively without a risk of trapping.
+; Since it is OK to speculate, PRE is allowed.
+
+define i32 @test15(i32* noalias nocapture readonly dereferenceable(8) %x, i32* noalias nocapture %r, i32 %a) {
+
+; CHECK-LABEL: @test15
+; CHECK: entry:
+; CHECK-NEXT: icmp eq
+; CHECK-NEXT: br i1
+
+entry:
+ %tobool = icmp eq i32 %a, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: entry.if.end_crit_edge:
+; CHECK-NEXT: %vv.pre = load i32, i32* %x, align 4
+; CHECK-NEXT: br label %if.end
+
+if.then:
+ %uu = load i32, i32* %x, align 4
+ store i32 %uu, i32* %r, align 4
+ br label %if.end
+
+; CHECK: if.then:
+; CHECK-NEXT: %uu = load i32, i32* %x, align 4
+; CHECK-NEXT: store i32 %uu, i32* %r, align 4
+; CHECK-NEXT: br label %if.end
+
+if.end:
+ call void @f()
+ %vv = load i32, i32* %x, align 4
+ ret i32 %vv
+
+; CHECK: if.end:
+; CHECK-NEXT: %vv = phi i32 [ %vv.pre, %entry.if.end_crit_edge ], [ %uu, %if.then ]
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: ret i32 %vv
+
+}
+
+; Same as test14, but %x here is dereferenceable. A pointer that is
+; dereferenceable can be loaded from speculatively without a risk of trapping.
+; Since it is OK to speculate, PRE is allowed.
+
+define i32 @test16(i32* noalias nocapture readonly dereferenceable(8) %x, i32* noalias nocapture %r, i32 %a) {
+
+; CHECK-LABEL: @test16(
+; CHECK: entry:
+; CHECK-NEXT: icmp eq
+; CHECK-NEXT: br i1
+
+entry:
+ %tobool = icmp eq i32 %a, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: entry.if.end_crit_edge:
+; CHECK-NEXT: %vv.pre = load i32, i32* %x, align 4
+; CHECK-NEXT: br label %if.end
+
+if.then:
+ %uu = load i32, i32* %x, align 4
+ store i32 %uu, i32* %r, align 4
+ br label %if.end
+
+; CHECK: if.then:
+; CHECK-NEXT: %uu = load i32, i32* %x, align 4
+; CHECK-NEXT: store i32 %uu, i32* %r, align 4
+; CHECK-NEXT: br label %if.end
+
+if.end:
+ call void @f()
+ br label %follow_1
+
+; CHECK: if.end:
+; CHECK-NEXT: %vv = phi i32 [ %vv.pre, %entry.if.end_crit_edge ], [ %uu, %if.then ]
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: ret i32 %vv
+
+follow_1:
+ br label %follow_2
+
+follow_2:
+ %vv = load i32, i32* %x, align 4
+ ret i32 %vv
+}