From: Teresa Johnson Date: Wed, 6 Mar 2019 14:57:40 +0000 (+0000) Subject: [CGP] Avoid repeatedly building DominatorTree causing long compile-time (NFC) X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f383764b620b5fd6ff6e2190e83ed26dcc548f28;p=llvm [CGP] Avoid repeatedly building DominatorTree causing long compile-time (NFC) Summary: In r354298 a DominatorTree construction was added via new function combineToUSubWithOverflow, which was subsequently restructured into replaceMathCmpWithIntrinsic in r354689. We are hitting a very long compile time due to this repeated construction, once per math cmp in the function. We shouldn't need to build the DominatorTree more than once per function, except when a transformation invalidates it. There is already a boolean flag that is returned from these methods indicating whether the DT has been modified. We can simply build the DT once per Function walk in CodeGenPrepare::runOnFunction, since any time a change is made we break out of the Function walk and restart it. I modified the code so that both replaceMathCmpWithIntrinsic as well as mergeSExts (which was also building a DT) use the DT constructed by the run method. From -mllvm -time-passes: Before this patch: CodeGen Prepare user time is 328s With this patch: CodeGen Prepare user time is 21s Reviewers: spatel Subscribers: jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D58995 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@355512 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 3996bdff8fc..ab11df2e982 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -345,8 +345,8 @@ class TypePromotionTransaction; void eliminateMostlyEmptyBlock(BasicBlock *BB); bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB, bool isPreheader); - bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT); - bool optimizeInst(Instruction *I, bool &ModifiedDT); + bool optimizeBlock(BasicBlock &BB, DominatorTree &DT, bool &ModifiedDT); + bool optimizeInst(Instruction *I, DominatorTree &DT, bool &ModifiedDT); bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy, unsigned AddrSpace); bool optimizeInlineAsmInst(CallInst *CS); @@ -366,7 +366,7 @@ class TypePromotionTransaction; const SmallVectorImpl &Exts, SmallVectorImpl &ProfitablyMovedExts, unsigned CreatedInstsCost = 0); - bool mergeSExts(Function &F); + bool mergeSExts(Function &F, DominatorTree &DT); bool splitLargeGEPOffsets(); bool performAddressTypePromotion( Instruction *&Inst, @@ -455,17 +455,18 @@ bool CodeGenPrepare::runOnFunction(Function &F) { bool MadeChange = true; while (MadeChange) { MadeChange = false; + DominatorTree DT(F); for (Function::iterator I = F.begin(); I != F.end(); ) { BasicBlock *BB = &*I++; bool ModifiedDTOnIteration = false; - MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration); + MadeChange |= optimizeBlock(*BB, DT, ModifiedDTOnIteration); // Restart BB iteration if the dominator tree of the Function was changed if (ModifiedDTOnIteration) break; } if (EnableTypePromotionMerge && !ValToSExtendedUses.empty()) - MadeChange |= mergeSExts(F); + MadeChange |= mergeSExts(F, DT); if (!LargeOffsetGEPMap.empty()) MadeChange |= splitLargeGEPOffsets(); @@ -1160,7 +1161,7 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, } static bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp, - Intrinsic::ID IID) { + Intrinsic::ID IID, DominatorTree &DT) { // We allow matching the canonical IR (add X, C) back to (usubo X, -C). Value *Arg0 = BO->getOperand(0); Value *Arg1 = BO->getOperand(1); @@ -1179,7 +1180,6 @@ static bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp, } else { // The math and compare may be independent instructions. Check dominance to // determine the insertion point for the intrinsic. - DominatorTree DT(*BO->getFunction()); bool MathDominates = DT.dominates(BO, Cmp); if (!MathDominates && !DT.dominates(Cmp, BO)) return false; @@ -1230,7 +1230,8 @@ static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, /// Try to combine the compare into a call to the llvm.uadd.with.overflow /// intrinsic. Return true if any changes were made. static bool combineToUAddWithOverflow(CmpInst *Cmp, const TargetLowering &TLI, - const DataLayout &DL, bool &ModifiedDT) { + const DataLayout &DL, DominatorTree &DT, + bool &ModifiedDT) { Value *A, *B; BinaryOperator *Add; if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) @@ -1247,7 +1248,7 @@ static bool combineToUAddWithOverflow(CmpInst *Cmp, const TargetLowering &TLI, if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse()) return false; - if (!replaceMathCmpWithIntrinsic(Add, Cmp, Intrinsic::uadd_with_overflow)) + if (!replaceMathCmpWithIntrinsic(Add, Cmp, Intrinsic::uadd_with_overflow, DT)) return false; // Reset callers - do not crash by iterating over a dead instruction. @@ -1256,7 +1257,8 @@ static bool combineToUAddWithOverflow(CmpInst *Cmp, const TargetLowering &TLI, } static bool combineToUSubWithOverflow(CmpInst *Cmp, const TargetLowering &TLI, - const DataLayout &DL, bool &ModifiedDT) { + const DataLayout &DL, DominatorTree &DT, + bool &ModifiedDT) { // Convert (A u> B) to (A u< B) to simplify pattern matching. Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1); ICmpInst::Predicate Pred = Cmp->getPredicate(); @@ -1304,7 +1306,7 @@ static bool combineToUSubWithOverflow(CmpInst *Cmp, const TargetLowering &TLI, TLI.getValueType(DL, Sub->getType()))) return false; - if (!replaceMathCmpWithIntrinsic(Sub, Cmp, Intrinsic::usub_with_overflow)) + if (!replaceMathCmpWithIntrinsic(Sub, Cmp, Intrinsic::usub_with_overflow, DT)) return false; // Reset callers - do not crash by iterating over a dead instruction. @@ -1379,14 +1381,15 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) { } static bool optimizeCmp(CmpInst *Cmp, const TargetLowering &TLI, - const DataLayout &DL, bool &ModifiedDT) { + const DataLayout &DL, DominatorTree &DT, + bool &ModifiedDT) { if (sinkCmpExpression(Cmp, TLI)) return true; - if (combineToUAddWithOverflow(Cmp, TLI, DL, ModifiedDT)) + if (combineToUAddWithOverflow(Cmp, TLI, DL, DT, ModifiedDT)) return true; - if (combineToUSubWithOverflow(Cmp, TLI, DL, ModifiedDT)) + if (combineToUSubWithOverflow(Cmp, TLI, DL, DT, ModifiedDT)) return true; return false; @@ -5169,8 +5172,7 @@ bool CodeGenPrepare::tryToPromoteExts( } /// Merging redundant sexts when one is dominating the other. -bool CodeGenPrepare::mergeSExts(Function &F) { - DominatorTree DT(F); +bool CodeGenPrepare::mergeSExts(Function &F, DominatorTree &DT) { bool Changed = false; for (auto &Entry : ValToSExtendedUses) { SExts &Insts = Entry.second; @@ -6827,7 +6829,8 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, return true; } -bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { +bool CodeGenPrepare::optimizeInst(Instruction *I, DominatorTree &DT, + bool &ModifiedDT) { // Bail out if we inserted the instruction to prevent optimizations from // stepping on each other's toes. if (InsertedInsts.count(I)) @@ -6876,7 +6879,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { } if (auto *Cmp = dyn_cast(I)) - if (TLI && optimizeCmp(Cmp, *TLI, *DL, ModifiedDT)) + if (TLI && optimizeCmp(Cmp, *TLI, *DL, DT, ModifiedDT)) return true; if (LoadInst *LI = dyn_cast(I)) { @@ -6938,7 +6941,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { GEPI->replaceAllUsesWith(NC); GEPI->eraseFromParent(); ++NumGEPsElim; - optimizeInst(NC, ModifiedDT); + optimizeInst(NC, DT, ModifiedDT); return true; } if (tryUnmergingGEPsAcrossIndirectBr(GEPI, TTI)) { @@ -6989,13 +6992,14 @@ static bool makeBitReverse(Instruction &I, const DataLayout &DL, // In this pass we look for GEP and cast instructions that are used // across basic blocks and rewrite them to improve basic-block-at-a-time // selection. -bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) { +bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, DominatorTree &DT, + bool &ModifiedDT) { SunkAddrs.clear(); bool MadeChange = false; CurInstIterator = BB.begin(); while (CurInstIterator != BB.end()) { - MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT); + MadeChange |= optimizeInst(&*CurInstIterator++, DT, ModifiedDT); if (ModifiedDT) return true; }