From: Rong Xu Date: Tue, 28 May 2019 21:45:56 +0000 (+0000) Subject: [PGO] Handle cases of failing to split critical edges X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8f580be75c44e6843d11c3f893579a56132dffd6;p=llvm [PGO] Handle cases of failing to split critical edges Fix PR41279 where critical edges to EHPad are not split. The fix is to not instrument those critical edges. We used to be able to know the size of counters right after MST is computed. With this, we have to pre-collect the instrument BBs to know the size, and then instrument them. Differential Revision: https://reviews.llvm.org/D62439 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@361882 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index d5267a86e2d..6996e7a0502 100644 --- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -573,6 +573,10 @@ public: // The Minimum Spanning Tree of function CFG. CFGMST MST; + // Collect all the BBs that will be instrumented, and store them in + // InstrumentBBs. + void getInstrumentBBs(std::vector &InstrumentBBs); + // Give an edge, find the BB that will be instrumented. // Return nullptr if there is no BB to be instrumented. BasicBlock *getInstrBB(Edge *E); @@ -629,16 +633,6 @@ public: if (CreateGlobalVar) FuncNameVar = createPGOFuncNameVar(F, FuncName); } - - // Return the number of profile counters needed for the function. - unsigned getNumCounters() { - unsigned NumCounters = 0; - for (auto &E : this->MST.AllEdges) { - if (!E->InMST && !E->Removed) - NumCounters++; - } - return NumCounters + SIVisitor.getNumOfSelectInsts(); - } }; } // end anonymous namespace @@ -753,6 +747,24 @@ void FuncPGOInstrumentation::renameComdatFunction() { } } +// Collect all the BBs that will be instruments and return them in +// InstrumentBBs. +template +void FuncPGOInstrumentation::getInstrumentBBs( + std::vector &InstrumentBBs) { + // Use a worklist as we will update the vector during the iteration. + std::vector EdgeList; + EdgeList.reserve(MST.AllEdges.size()); + for (auto &E : MST.AllEdges) + EdgeList.push_back(E.get()); + + for (auto &E : EdgeList) { + BasicBlock *InstrBB = getInstrBB(E); + if (InstrBB) + InstrumentBBs.push_back(InstrBB); + } +} + // Given a CFG E to be instrumented, find which BB to place the instrumented // code. The function will split the critical edge if necessary. template @@ -783,9 +795,18 @@ BasicBlock *FuncPGOInstrumentation::getInstrBB(Edge *E) { << " --> " << getBBInfo(DestBB).Index << "\n"); unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum); - assert(InstrBB && "Critical edge is not split"); - + if (!InstrBB) { + LLVM_DEBUG( + dbgs() << "Fail to split critical edge: not instrument this edge.\n"); + return nullptr; + } + // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. + MST.addEdge(SrcBB, InstrBB, 0); + // Second one: Add new edge of InstrBB->DestBB. + Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); + NewEdge1.InMST = true; E->Removed = true; + return InstrBB; } @@ -801,15 +822,14 @@ static void instrumentOneFunc( FuncPGOInstrumentation FuncInfo(F, ComdatMembers, true, BPI, BFI, IsCS); - unsigned NumCounters = FuncInfo.getNumCounters(); + std::vector InstrumentBBs; + FuncInfo.getInstrumentBBs(InstrumentBBs); + unsigned NumCounters = + InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); uint32_t I = 0; Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); - for (auto &E : FuncInfo.MST.AllEdges) { - BasicBlock *InstrBB = FuncInfo.getInstrBB(E.get()); - if (!InstrBB) - continue; - + for (auto *InstrBB : InstrumentBBs) { IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); assert(Builder.GetInsertPoint() != InstrBB->end() && "Cannot get the Instrumentation point"); @@ -1039,39 +1059,31 @@ private: // edges and the BB. Return false on error. bool PGOUseFunc::setInstrumentedCounts( const std::vector &CountFromProfile) { + + std::vector InstrumentBBs; + FuncInfo.getInstrumentBBs(InstrumentBBs); + unsigned NumCounters = + InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); // The number of counters here should match the number of counters // in profile. Return if they mismatch. - if (FuncInfo.getNumCounters() != CountFromProfile.size()) { + if (NumCounters != CountFromProfile.size()) { return false; } - // Use a worklist as we will update the vector during the iteration. - std::vector WorkList; - for (auto &E : FuncInfo.MST.AllEdges) - WorkList.push_back(E.get()); - uint32_t I = 0; - for (auto &E : WorkList) { - BasicBlock *InstrBB = FuncInfo.getInstrBB(E); - if (!InstrBB) - continue; + for (BasicBlock *InstrBB : InstrumentBBs) { uint64_t CountValue = CountFromProfile[I++]; - if (!E->Removed) { - getBBInfo(InstrBB).setBBInfoCount(CountValue); - E->setEdgeCount(CountValue); - continue; + UseBBInfo &Info = getBBInfo(InstrBB); + Info.setBBInfoCount(CountValue); + // If only one in-edge, the edge profile count should be the same as BB + // profile count. + if (Info.InEdges.size() == 1) { + Info.InEdges[0]->setEdgeCount(CountValue); + } + // If only one out-edge, the edge profile count should be the same as BB + // profile count. + if (Info.OutEdges.size() == 1) { + Info.OutEdges[0]->setEdgeCount(CountValue); } - - // Need to add two new edges. - BasicBlock *SrcBB = const_cast(E->SrcBB); - BasicBlock *DestBB = const_cast(E->DestBB); - // Add new edge of SrcBB->InstrBB. - PGOUseEdge &NewEdge = FuncInfo.MST.addEdge(SrcBB, InstrBB, 0); - NewEdge.setEdgeCount(CountValue); - // Add new edge of InstrBB->DestBB. - PGOUseEdge &NewEdge1 = FuncInfo.MST.addEdge(InstrBB, DestBB, 0); - NewEdge1.setEdgeCount(CountValue); - NewEdge1.InMST = true; - getBBInfo(InstrBB).setBBInfoCount(CountValue); } ProfileCountSize = CountFromProfile.size(); CountPosition = I; diff --git a/test/Transforms/PGOProfile/PR41279.ll b/test/Transforms/PGOProfile/PR41279.ll new file mode 100644 index 00000000000..373563ba75f --- /dev/null +++ b/test/Transforms/PGOProfile/PR41279.ll @@ -0,0 +1,67 @@ +; Test that instrumentaiton works fine for the case of failing the split critical edges. +; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN +; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN + +declare void @f3({ i8*, i64 }*, { i8*, i64 }*, i64) +declare { i8*, i64 } @f0({ i8*, i64 }*) +declare i64 @f1() +declare void @invok2({ i8*, i64 }*, i8* noalias readonly align 1, i64) +declare void @invok1({ i8*, i64 }*, { i8*, i64 }*, i64) +declare i32 @__CxxFrameHandler3(...) + +define internal void @foo({ i8*, i64 }*, { i8*, i64 }*) personality i32 (...)* @__CxxFrameHandler3 { + %3 = alloca i8, align 1 + store i8 0, i8* %3, align 1 + %4 = call i64 @f1() + %5 = icmp ult i64 %4, 32 + br i1 %5, label %7, label %13 + +6: + cleanupret from %17 unwind to caller +; GEN: 6: +; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 2) + +7: + store i8 1, i8* %3, align 1 + %8 = call { i8*, i64 } @f0({ i8*, i64 }* %0) + %9 = extractvalue { i8*, i64 } %8, 0 + %10 = extractvalue { i8*, i64 } %8, 1 + invoke void @invok1({ i8*, i64 }* %1, { i8*, i64 }* %0, i64 1) + to label %11 unwind label %16 +; GEN: 7: +; GEN-NOT: call void @llvm.instrprof.increment + +11: + store i8 0, i8* %3, align 1 + invoke void @invok2({ i8*, i64 }* %1, i8* noalias readonly align 1 %9, i64 %10) + to label %12 unwind label %16 +; GEN: 11: +; GEN-NOT: call void @llvm.instrprof.increment + +12: + store i8 0, i8* %3, align 1 + br label %14 +; GEN: 12: +; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 1) + +13: + call void @f3({ i8*, i64 }* %0, { i8*, i64 }* %1, i64 1) + br label %14 +; GEN: 13: +; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 0) + +14: + ret void + +15: + store i8 0, i8* %3, align 1 + br label %6 +; GEN: 15: +; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 3) + +16: + %17 = cleanuppad within none [] + %18 = load i8, i8* %3, align 1 + %19 = trunc i8 %18 to i1 + br i1 %19, label %15, label %6 +}