From c93c3839e7f72f1960cb56bd403580965d87c887 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 19 Dec 2018 19:56:21 +0000 Subject: [PATCH] [BDCE][DemandedBits] Detect dead uses of undead instructions This (mostly) fixes https://bugs.llvm.org/show_bug.cgi?id=39771. BDCE currently detects instructions that don't have any demanded bits and replaces their uses with zero. However, if an instruction has multiple uses, then some of the uses may be dead (have no demanded bits) even though the instruction itself is still live. This patch extends DemandedBits/BDCE to detect such uses and replace them with zero. While this will not immediately render any instructions dead, it may lead to simplifications (in the motivating case, by converting a rotate into a simple shift), break dependencies, etc. The implementation tries to strike a balance between analysis power and complexity/memory usage. Originally I wanted to track demanded bits on a per-use level, but ultimately we're only really interested in whether a use is entirely dead or not. I'm using an extra set to track which uses are dead. However, as initially all uses are dead, I'm not storing uses those user is also dead. This case is checked separately instead. The test case has a couple of cases that are not simplified yet. In particular, we're only looking at uses of instructions right now. I think it would make sense to also extend this to arguments. Furthermore DemandedBits doesn't yet know some of the tricks that InstCombine does for the demanded bits or bitwise or/and/xor in combination with known bits information. Differential Revision: https://reviews.llvm.org/D55563 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@349674 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/DemandedBits.h | 6 ++++ lib/Analysis/DemandedBits.cpp | 47 ++++++++++++++++++++++++---- lib/Transforms/Scalar/BDCE.cpp | 38 +++++++++++++--------- test/Transforms/BDCE/dead-uses.ll | 4 +-- 4 files changed, 72 insertions(+), 23 deletions(-) diff --git a/include/llvm/Analysis/DemandedBits.h b/include/llvm/Analysis/DemandedBits.h index f751d2edcff..f16d12552f7 100644 --- a/include/llvm/Analysis/DemandedBits.h +++ b/include/llvm/Analysis/DemandedBits.h @@ -57,6 +57,9 @@ public: /// Return true if, during analysis, I could not be reached. bool isInstructionDead(Instruction *I); + /// Return whether this use is dead by means of not having any demanded bits. + bool isUseDead(Use *U); + void print(raw_ostream &OS); private: @@ -75,6 +78,9 @@ private: // The set of visited instructions (non-integer-typed only). SmallPtrSet Visited; DenseMap AliveBits; + // Uses with no demanded bits. If the user also has no demanded bits, the use + // might not be stored explicitly in this map, to save memory during analysis. + SmallPtrSet DeadUses; }; class DemandedBitsWrapperPass : public FunctionPass { diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp index 0382787fbef..fcc2fd8962c 100644 --- a/lib/Analysis/DemandedBits.cpp +++ b/lib/Analysis/DemandedBits.cpp @@ -314,6 +314,7 @@ void DemandedBits::performAnalysis() { Visited.clear(); AliveBits.clear(); + DeadUses.clear(); SmallVector Worklist; @@ -374,26 +375,35 @@ void DemandedBits::performAnalysis() { Type *T = I->getType(); if (T->isIntOrIntVectorTy()) { unsigned BitWidth = T->getScalarSizeInBits(); + + // Previous demanded bits information for this use. + APInt ABPrev(BitWidth, 0); + auto ABI = AliveBits.find(I); + if (ABI != AliveBits.end()) + ABPrev = ABI->second; + APInt AB = APInt::getAllOnesValue(BitWidth); if (UserI->getType()->isIntOrIntVectorTy() && !AOut && !isAlwaysLive(UserI)) { + // If all bits of the output are dead, then all bits of the input + // are also dead. AB = APInt(BitWidth, 0); } else { - // If all bits of the output are dead, then all bits of the input // Bits of each operand that are used to compute alive bits of the // output are alive, all others are dead. determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB, Known, Known2); + + // Keep track of uses which have no demanded bits. + if (AB.isNullValue()) + DeadUses.insert(&OI); + else if (ABPrev.isNullValue()) + DeadUses.erase(&OI); } // If we've added to the set of alive bits (or the operand has not // been previously visited), then re-queue the operand to be visited // again. - APInt ABPrev(BitWidth, 0); - auto ABI = AliveBits.find(I); - if (ABI != AliveBits.end()) - ABPrev = ABI->second; - APInt ABNew = AB | ABPrev; if (ABNew != ABPrev || ABI == AliveBits.end()) { AliveBits[I] = std::move(ABNew); @@ -426,6 +436,31 @@ bool DemandedBits::isInstructionDead(Instruction *I) { !isAlwaysLive(I); } +bool DemandedBits::isUseDead(Use *U) { + // We only track integer uses, everything else is assumed live. + if (!(*U)->getType()->isIntOrIntVectorTy()) + return false; + + // Uses by always-live instructions are never dead. + Instruction *UserI = cast(U->getUser()); + if (isAlwaysLive(UserI)) + return false; + + performAnalysis(); + if (DeadUses.count(U)) + return true; + + // If no output bits are demanded, no input bits are demanded and the use + // is dead. These uses might not be explicitly present in the DeadUses map. + if (UserI->getType()->isIntOrIntVectorTy()) { + auto Found = AliveBits.find(UserI); + if (Found != AliveBits.end() && Found->second.isNullValue()) + return true; + } + + return false; +} + void DemandedBits::print(raw_ostream &OS) { performAnalysis(); for (auto &KV : AliveBits) { diff --git a/lib/Transforms/Scalar/BDCE.cpp b/lib/Transforms/Scalar/BDCE.cpp index f63182e57c1..65a68c17977 100644 --- a/lib/Transforms/Scalar/BDCE.cpp +++ b/lib/Transforms/Scalar/BDCE.cpp @@ -96,30 +96,38 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) { if (I.mayHaveSideEffects() && I.use_empty()) continue; - if (I.getType()->isIntOrIntVectorTy() && - !DB.getDemandedBits(&I).getBoolValue()) { - // For live instructions that have all dead bits, first make them dead by - // replacing all uses with something else. Then, if they don't need to - // remain live (because they have side effects, etc.) we can remove them. - LLVM_DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n"); + // Remove instructions not reached during analysis. + if (DB.isInstructionDead(&I)) { + salvageDebugInfo(I); + Worklist.push_back(&I); + I.dropAllReferences(); + Changed = true; + continue; + } + + for (Use &U : I.operands()) { + // DemandedBits only detects dead integer uses. + if (!U->getType()->isIntOrIntVectorTy()) + continue; + + // TODO: We could also find dead non-instruction uses, e.g. arguments. + if (!isa(U)) + continue; + + if (!DB.isUseDead(&U)) + continue; + + LLVM_DEBUG(dbgs() << "BDCE: Trivializing: " << U << " (all bits dead)\n"); clearAssumptionsOfUsers(&I, DB); // FIXME: In theory we could substitute undef here instead of zero. // This should be reconsidered once we settle on the semantics of // undef, poison, etc. - Value *Zero = ConstantInt::get(I.getType(), 0); + U.set(ConstantInt::get(U->getType(), 0)); ++NumSimplified; - I.replaceNonMetadataUsesWith(Zero); Changed = true; } - if (!DB.isInstructionDead(&I)) - continue; - - salvageDebugInfo(I); - Worklist.push_back(&I); - I.dropAllReferences(); - Changed = true; } for (Instruction *&I : Worklist) { diff --git a/test/Transforms/BDCE/dead-uses.ll b/test/Transforms/BDCE/dead-uses.ll index 95c2893871f..94a3e07600f 100644 --- a/test/Transforms/BDCE/dead-uses.ll +++ b/test/Transforms/BDCE/dead-uses.ll @@ -10,7 +10,7 @@ declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) define i32 @pr39771_fshr_multi_use_instr(i32 %a) { ; CHECK-LABEL: @pr39771_fshr_multi_use_instr( ; CHECK-NEXT: [[X:%.*]] = or i32 [[A:%.*]], 0 -; CHECK-NEXT: [[B:%.*]] = tail call i32 @llvm.fshr.i32(i32 [[X]], i32 [[X]], i32 1) +; CHECK-NEXT: [[B:%.*]] = tail call i32 @llvm.fshr.i32(i32 0, i32 [[X]], i32 1) ; CHECK-NEXT: [[C:%.*]] = lshr i32 [[B]], 23 ; CHECK-NEXT: [[D:%.*]] = xor i32 [[C]], [[B]] ; CHECK-NEXT: [[E:%.*]] = and i32 [[D]], 31 @@ -28,7 +28,7 @@ define i32 @pr39771_fshr_multi_use_instr(i32 %a) { define <2 x i32> @pr39771_fshr_multi_use_instr_vec(<2 x i32> %a) { ; CHECK-LABEL: @pr39771_fshr_multi_use_instr_vec( ; CHECK-NEXT: [[X:%.*]] = or <2 x i32> [[A:%.*]], zeroinitializer -; CHECK-NEXT: [[B:%.*]] = tail call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X]], <2 x i32> [[X]], <2 x i32> ) +; CHECK-NEXT: [[B:%.*]] = tail call <2 x i32> @llvm.fshr.v2i32(<2 x i32> zeroinitializer, <2 x i32> [[X]], <2 x i32> ) ; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], ; CHECK-NEXT: [[D:%.*]] = xor <2 x i32> [[C]], [[B]] ; CHECK-NEXT: [[E:%.*]] = and <2 x i32> [[D]], -- 2.50.1