From e00207538cd103c6ce1d7ee95ec575401b6c7bf2 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 1 Jan 2019 10:05:26 +0000 Subject: [PATCH] Reapply "[BDCE][DemandedBits] Detect dead uses of undead instructions" This (mostly) fixes https://bugs.llvm.org/show_bug.cgi?id=39771. BDCE currently detects instructions that don't have any demanded bits and replaces their uses with zero. However, if an instruction has multiple uses, then some of the uses may be dead (have no demanded bits) even though the instruction itself is still live. This patch extends DemandedBits/BDCE to detect such uses and replace them with zero. While this will not immediately render any instructions dead, it may lead to simplifications (in the motivating case, by converting a rotate into a simple shift), break dependencies, etc. The implementation tries to strike a balance between analysis power and complexity/memory usage. Originally I wanted to track demanded bits on a per-use level, but ultimately we're only really interested in whether a use is entirely dead or not. I'm using an extra set to track which uses are dead. However, as initially all uses are dead, I'm not storing uses those user is also dead. This case is checked separately instead. The previous attempt to land this lead to miscompiles, because cases where uses were initially dead but were later found to be live during further analysis were not always correctly removed from the DeadUses set. This is fixed now and the added test case demanstrates such an instance. Differential Revision: https://reviews.llvm.org/D55563 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350188 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/DemandedBits.h | 6 +++++ lib/Analysis/DemandedBits.cpp | 38 ++++++++++++++++++++++++++-- lib/Transforms/Scalar/BDCE.cpp | 38 +++++++++++++++++----------- test/Transforms/BDCE/dead-uses.ll | 29 +++++++++++++++++++-- 4 files changed, 92 insertions(+), 19 deletions(-) diff --git a/include/llvm/Analysis/DemandedBits.h b/include/llvm/Analysis/DemandedBits.h index f751d2edcff..f16d12552f7 100644 --- a/include/llvm/Analysis/DemandedBits.h +++ b/include/llvm/Analysis/DemandedBits.h @@ -57,6 +57,9 @@ public: /// Return true if, during analysis, I could not be reached. bool isInstructionDead(Instruction *I); + /// Return whether this use is dead by means of not having any demanded bits. + bool isUseDead(Use *U); + void print(raw_ostream &OS); private: @@ -75,6 +78,9 @@ private: // The set of visited instructions (non-integer-typed only). SmallPtrSet Visited; DenseMap AliveBits; + // Uses with no demanded bits. If the user also has no demanded bits, the use + // might not be stored explicitly in this map, to save memory during analysis. + SmallPtrSet DeadUses; }; class DemandedBitsWrapperPass : public FunctionPass { diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp index 0382787fbef..4aaa1182116 100644 --- a/lib/Analysis/DemandedBits.cpp +++ b/lib/Analysis/DemandedBits.cpp @@ -314,6 +314,7 @@ void DemandedBits::performAnalysis() { Visited.clear(); AliveBits.clear(); + DeadUses.clear(); SmallVector Worklist; @@ -358,7 +359,8 @@ void DemandedBits::performAnalysis() { APInt AOut; if (UserI->getType()->isIntOrIntVectorTy()) { AOut = AliveBits[UserI]; - LLVM_DEBUG(dbgs() << " Alive Out: " << AOut); + LLVM_DEBUG(dbgs() << " Alive Out: 0x" + << Twine::utohexstr(AOut.getLimitedValue())); } LLVM_DEBUG(dbgs() << "\n"); @@ -377,13 +379,20 @@ void DemandedBits::performAnalysis() { APInt AB = APInt::getAllOnesValue(BitWidth); if (UserI->getType()->isIntOrIntVectorTy() && !AOut && !isAlwaysLive(UserI)) { + // If all bits of the output are dead, then all bits of the input + // are also dead. AB = APInt(BitWidth, 0); } else { - // If all bits of the output are dead, then all bits of the input // Bits of each operand that are used to compute alive bits of the // output are alive, all others are dead. determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB, Known, Known2); + + // Keep track of uses which have no demanded bits. + if (AB.isNullValue()) + DeadUses.insert(&OI); + else + DeadUses.erase(&OI); } // If we've added to the set of alive bits (or the operand has not @@ -426,6 +435,31 @@ bool DemandedBits::isInstructionDead(Instruction *I) { !isAlwaysLive(I); } +bool DemandedBits::isUseDead(Use *U) { + // We only track integer uses, everything else is assumed live. + if (!(*U)->getType()->isIntOrIntVectorTy()) + return false; + + // Uses by always-live instructions are never dead. + Instruction *UserI = cast(U->getUser()); + if (isAlwaysLive(UserI)) + return false; + + performAnalysis(); + if (DeadUses.count(U)) + return true; + + // If no output bits are demanded, no input bits are demanded and the use + // is dead. These uses might not be explicitly present in the DeadUses map. + if (UserI->getType()->isIntOrIntVectorTy()) { + auto Found = AliveBits.find(UserI); + if (Found != AliveBits.end() && Found->second.isNullValue()) + return true; + } + + return false; +} + void DemandedBits::print(raw_ostream &OS) { performAnalysis(); for (auto &KV : AliveBits) { diff --git a/lib/Transforms/Scalar/BDCE.cpp b/lib/Transforms/Scalar/BDCE.cpp index f63182e57c1..65a68c17977 100644 --- a/lib/Transforms/Scalar/BDCE.cpp +++ b/lib/Transforms/Scalar/BDCE.cpp @@ -96,30 +96,38 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) { if (I.mayHaveSideEffects() && I.use_empty()) continue; - if (I.getType()->isIntOrIntVectorTy() && - !DB.getDemandedBits(&I).getBoolValue()) { - // For live instructions that have all dead bits, first make them dead by - // replacing all uses with something else. Then, if they don't need to - // remain live (because they have side effects, etc.) we can remove them. - LLVM_DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n"); + // Remove instructions not reached during analysis. + if (DB.isInstructionDead(&I)) { + salvageDebugInfo(I); + Worklist.push_back(&I); + I.dropAllReferences(); + Changed = true; + continue; + } + + for (Use &U : I.operands()) { + // DemandedBits only detects dead integer uses. + if (!U->getType()->isIntOrIntVectorTy()) + continue; + + // TODO: We could also find dead non-instruction uses, e.g. arguments. + if (!isa(U)) + continue; + + if (!DB.isUseDead(&U)) + continue; + + LLVM_DEBUG(dbgs() << "BDCE: Trivializing: " << U << " (all bits dead)\n"); clearAssumptionsOfUsers(&I, DB); // FIXME: In theory we could substitute undef here instead of zero. // This should be reconsidered once we settle on the semantics of // undef, poison, etc. - Value *Zero = ConstantInt::get(I.getType(), 0); + U.set(ConstantInt::get(U->getType(), 0)); ++NumSimplified; - I.replaceNonMetadataUsesWith(Zero); Changed = true; } - if (!DB.isInstructionDead(&I)) - continue; - - salvageDebugInfo(I); - Worklist.push_back(&I); - I.dropAllReferences(); - Changed = true; } for (Instruction *&I : Worklist) { diff --git a/test/Transforms/BDCE/dead-uses.ll b/test/Transforms/BDCE/dead-uses.ll index 95c2893871f..46efa27b62b 100644 --- a/test/Transforms/BDCE/dead-uses.ll +++ b/test/Transforms/BDCE/dead-uses.ll @@ -10,7 +10,7 @@ declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) define i32 @pr39771_fshr_multi_use_instr(i32 %a) { ; CHECK-LABEL: @pr39771_fshr_multi_use_instr( ; CHECK-NEXT: [[X:%.*]] = or i32 [[A:%.*]], 0 -; CHECK-NEXT: [[B:%.*]] = tail call i32 @llvm.fshr.i32(i32 [[X]], i32 [[X]], i32 1) +; CHECK-NEXT: [[B:%.*]] = tail call i32 @llvm.fshr.i32(i32 0, i32 [[X]], i32 1) ; CHECK-NEXT: [[C:%.*]] = lshr i32 [[B]], 23 ; CHECK-NEXT: [[D:%.*]] = xor i32 [[C]], [[B]] ; CHECK-NEXT: [[E:%.*]] = and i32 [[D]], 31 @@ -28,7 +28,7 @@ define i32 @pr39771_fshr_multi_use_instr(i32 %a) { define <2 x i32> @pr39771_fshr_multi_use_instr_vec(<2 x i32> %a) { ; CHECK-LABEL: @pr39771_fshr_multi_use_instr_vec( ; CHECK-NEXT: [[X:%.*]] = or <2 x i32> [[A:%.*]], zeroinitializer -; CHECK-NEXT: [[B:%.*]] = tail call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X]], <2 x i32> [[X]], <2 x i32> ) +; CHECK-NEXT: [[B:%.*]] = tail call <2 x i32> @llvm.fshr.v2i32(<2 x i32> zeroinitializer, <2 x i32> [[X]], <2 x i32> ) ; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], ; CHECK-NEXT: [[D:%.*]] = xor <2 x i32> [[C]], [[B]] ; CHECK-NEXT: [[E:%.*]] = and <2 x i32> [[D]], @@ -77,3 +77,28 @@ define i32 @pr39771_expanded_fshr_multi_use(i32 %a) { %e = and i32 %d, 31 ret i32 %e } + +; %b operand of %c will be dead initially, but later found live. +define void @dead_use_invalidation(i32 %a) { +; CHECK-LABEL: @dead_use_invalidation( +; CHECK-NEXT: [[B:%.*]] = or i32 [[A:%.*]], 0 +; CHECK-NEXT: [[C:%.*]] = shl i32 [[B]], 31 +; CHECK-NEXT: [[D:%.*]] = and i32 [[C]], 1 +; CHECK-NEXT: [[E:%.*]] = or i32 [[C]], 0 +; CHECK-NEXT: [[F:%.*]] = or i32 [[D]], 0 +; CHECK-NEXT: call void @dummy(i32 [[E]]) +; CHECK-NEXT: call void @dummy(i32 [[F]]) +; CHECK-NEXT: call void @dummy(i32 [[B]]) +; CHECK-NEXT: ret void +; + %b = or i32 %a, 0 + %c = shl i32 %b, 31 + %d = and i32 %c, 1 + %e = or i32 %c, 0 + %f = or i32 %d, 0 + call void @dummy(i32 %e) + call void @dummy(i32 %f) + call void @dummy(i32 %b) + ret void +} +declare void @dummy(i32) -- 2.50.1