From 60eee05a65b3742e87c82581857e6f6374ade03c Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 16 Jun 2016 20:47:57 +0000 Subject: [PATCH] [EarlyCSE] Fold invariant loads Redundant invariant loads can be CSE'ed with very little extra effort over what early-cse already tracks, so it looks reasonable to make early-cse handle this case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@272954 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/EarlyCSE.cpp | 30 +++++-- test/Transforms/EarlyCSE/invariant-loads.ll | 99 +++++++++++++++++++++ 2 files changed, 121 insertions(+), 8 deletions(-) create mode 100644 test/Transforms/EarlyCSE/invariant-loads.ll diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 4145bca367b..48253f62634 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -283,12 +283,14 @@ public: unsigned Generation; int MatchingId; bool IsAtomic; + bool IsInvariant; LoadValue() - : DefInst(nullptr), Generation(0), MatchingId(-1), IsAtomic(false) {} + : DefInst(nullptr), Generation(0), MatchingId(-1), IsAtomic(false), + IsInvariant(false) {} LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId, - bool IsAtomic) - : DefInst(Inst), Generation(Generation), MatchingId(MatchingId), - IsAtomic(IsAtomic) {} + bool IsAtomic, bool IsInvariant) + : DefInst(Inst), Generation(Generation), MatchingId(MatchingId), + IsAtomic(IsAtomic), IsInvariant(IsInvariant) {} }; typedef RecyclingAllocator> @@ -430,6 +432,11 @@ private: return true; } + bool isInvariantLoad() const { + if (auto *LI = dyn_cast(Inst)) + return LI->getMetadata(LLVMContext::MD_invariant_load); + return false; + } bool isMatchingMemLoc(const ParseMemoryInst &Inst) const { return (getPointerOperand() == Inst.getPointerOperand() && @@ -612,9 +619,16 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { } // If we have an available version of this load, and if it is the right - // generation, replace this instruction. + // generation or the load is known to be from an invariant location, + // replace this instruction. + // + // A dominating invariant load implies that the location loaded from is + // unchanging beginning at the point of the invariant load, so the load + // we're CSE'ing _away_ does not need to be invariant, only the available + // load we're CSE'ing _to_ does. LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand()); - if (InVal.DefInst != nullptr && InVal.Generation == CurrentGeneration && + if (InVal.DefInst != nullptr && + (InVal.Generation == CurrentGeneration || InVal.IsInvariant) && InVal.MatchingId == MemInst.getMatchingId() && // We don't yet handle removing loads with ordering of any kind. !MemInst.isVolatile() && MemInst.isUnordered() && @@ -637,7 +651,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { AvailableLoads.insert( MemInst.getPointerOperand(), LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(), - MemInst.isAtomic())); + MemInst.isAtomic(), MemInst.isInvariantLoad())); LastStore = nullptr; continue; } @@ -749,7 +763,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { AvailableLoads.insert( MemInst.getPointerOperand(), LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(), - MemInst.isAtomic())); + MemInst.isAtomic(), false)); // Remember that this was the last unordered store we saw for DSE. We // don't yet handle DSE on ordered or volatile stores since we don't diff --git a/test/Transforms/EarlyCSE/invariant-loads.ll b/test/Transforms/EarlyCSE/invariant-loads.ll new file mode 100644 index 00000000000..04c7dd1372d --- /dev/null +++ b/test/Transforms/EarlyCSE/invariant-loads.ll @@ -0,0 +1,99 @@ +; RUN: opt -S -early-cse < %s | FileCheck %s + +declare void @clobber_and_use(i32) + +define void @f_0(i32* %ptr) { +; CHECK-LABEL: @f_0( +; CHECK: %val0 = load i32, i32* %ptr, !invariant.load !0 +; CHECK: call void @clobber_and_use(i32 %val0) +; CHECK: call void @clobber_and_use(i32 %val0) +; CHECK: call void @clobber_and_use(i32 %val0) +; CHECK: ret void + + %val0 = load i32, i32* %ptr, !invariant.load !{} + call void @clobber_and_use(i32 %val0) + %val1 = load i32, i32* %ptr, !invariant.load !{} + call void @clobber_and_use(i32 %val1) + %val2 = load i32, i32* %ptr, !invariant.load !{} + call void @clobber_and_use(i32 %val2) + ret void +} + +define void @f_1(i32* %ptr) { +; We can forward invariant loads to non-invariant loads, since once an +; invariant load has executed, the location loaded from is known to be +; unchanging. + +; CHECK-LABEL: @f_1( +; CHECK: %val0 = load i32, i32* %ptr, !invariant.load !0 +; CHECK: call void @clobber_and_use(i32 %val0) +; CHECK: call void @clobber_and_use(i32 %val0) + + %val0 = load i32, i32* %ptr, !invariant.load !{} + call void @clobber_and_use(i32 %val0) + %val1 = load i32, i32* %ptr + call void @clobber_and_use(i32 %val1) + ret void +} + +define void @f_2(i32* %ptr) { +; Negative test -- we can't forward a non-invariant load into an +; invariant load. + +; CHECK-LABEL: @f_2( +; CHECK: %val0 = load i32, i32* %ptr +; CHECK: call void @clobber_and_use(i32 %val0) +; CHECK: %val1 = load i32, i32* %ptr, !invariant.load !0 +; CHECK: call void @clobber_and_use(i32 %val1) + + %val0 = load i32, i32* %ptr + call void @clobber_and_use(i32 %val0) + %val1 = load i32, i32* %ptr, !invariant.load !{} + call void @clobber_and_use(i32 %val1) + ret void +} + +define void @f_3(i1 %cond, i32* %ptr) { +; CHECK-LABEL: @f_3( + %val0 = load i32, i32* %ptr, !invariant.load !{} + call void @clobber_and_use(i32 %val0) + br i1 %cond, label %left, label %right + +; CHECK: %val0 = load i32, i32* %ptr, !invariant.load !0 +; CHECK: left: +; CHECK-NEXT: call void @clobber_and_use(i32 %val0) + +left: + %val1 = load i32, i32* %ptr + call void @clobber_and_use(i32 %val1) + ret void + +right: + ret void +} + +define void @f_4(i1 %cond, i32* %ptr) { +; Negative test -- can't forward %val0 to %va1 because that'll break +; def-dominates-use. + +; CHECK-LABEL: @f_4( + br i1 %cond, label %left, label %merge + +left: +; CHECK: left: +; CHECK-NEXT: %val0 = load i32, i32* %ptr, !invariant.load ! +; CHECK-NEXT: call void @clobber_and_use(i32 %val0) + + %val0 = load i32, i32* %ptr, !invariant.load !{} + call void @clobber_and_use(i32 %val0) + br label %merge + +merge: +; CHECK: merge: +; CHECK-NEXT: %val1 = load i32, i32* %ptr +; CHECK-NEXT: call void @clobber_and_use(i32 %val1) + + %val1 = load i32, i32* %ptr + call void @clobber_and_use(i32 %val1) + ret void +} -- 2.50.1