From f9ab36ca631ede7e179d30f35f00ff55d6ca7766 Mon Sep 17 00:00:00 2001 From: Rong Xu Date: Mon, 10 Jun 2019 22:36:27 +0000 Subject: [PATCH] [PGO] Handle cases of non-instrument BBs As shown in PR41279, some basic blocks (such as catchswitch) cannot be instrumented. This patch filters out these BBs in PGO instrumentation. It also sets the profile count to the fail-to-instrument edge, so that we can propagate the counts in the CFG. Differential Revision: https://reviews.llvm.org/D62700 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362995 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Instrumentation/PGOInstrumentation.cpp | 123 ++++++++++++------ .../PGOProfile/Inputs/PR41279.proftext | 9 ++ .../PGOProfile/Inputs/PR41279_2.proftext | 7 + test/Transforms/PGOProfile/PR41279.ll | 27 +++- test/Transforms/PGOProfile/PR41279_2.ll | 68 ++++++++++ 5 files changed, 191 insertions(+), 43 deletions(-) create mode 100644 test/Transforms/PGOProfile/Inputs/PR41279.proftext create mode 100644 test/Transforms/PGOProfile/Inputs/PR41279_2.proftext create mode 100644 test/Transforms/PGOProfile/PR41279_2.ll diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 6996e7a0502..1bf80d63500 100644 --- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -544,6 +544,12 @@ struct BBInfo { const std::string infoString() const { return (Twine("Index=") + Twine(Index)).str(); } + + // Empty function -- only applicable to UseBBInfo. + void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} + + // Empty function -- only applicable to UseBBInfo. + void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} }; // This class implements the CFG edges. Note the CFG can be a multi-graph. @@ -748,7 +754,7 @@ void FuncPGOInstrumentation::renameComdatFunction() { } // Collect all the BBs that will be instruments and return them in -// InstrumentBBs. +// InstrumentBBs and setup InEdges/OutEdge for UseBBInfo. template void FuncPGOInstrumentation::getInstrumentBBs( std::vector &InstrumentBBs) { @@ -763,6 +769,18 @@ void FuncPGOInstrumentation::getInstrumentBBs( if (InstrBB) InstrumentBBs.push_back(InstrBB); } + + // Set up InEdges/OutEdges for all BBs. + for (auto &E : MST.AllEdges) { + if (E->Removed) + continue; + const BasicBlock *SrcBB = E->SrcBB; + const BasicBlock *DestBB = E->DestBB; + BBInfo &SrcInfo = getBBInfo(SrcBB); + BBInfo &DestInfo = getBBInfo(DestBB); + SrcInfo.addOutEdge(E.get()); + DestInfo.addInEdge(E.get()); + } } // Given a CFG E to be instrumented, find which BB to place the instrumented @@ -780,19 +798,25 @@ BasicBlock *FuncPGOInstrumentation::getInstrBB(Edge *E) { if (DestBB == nullptr) return SrcBB; + auto canInstrument = [this](BasicBlock *BB) -> BasicBlock * { + // There are basic blocks (such as catchswitch) cannot be instrumented. + // If the returned first insertion point is the end of BB, skip this BB. + if (BB->getFirstInsertionPt() == BB->end()) { + LLVM_DEBUG(dbgs() << "Cannot instrument BB index=" << getBBInfo(BB).Index + << "\n"); + return nullptr; + } + return BB; + }; + // Instrument the SrcBB if it has a single successor, // otherwise, the DestBB if this is not a critical edge. Instruction *TI = SrcBB->getTerminator(); if (TI->getNumSuccessors() <= 1) - return SrcBB; + return canInstrument(SrcBB); if (!E->IsCritical) - return DestBB; + return canInstrument(DestBB); - // For a critical edge, we have to split. Instrument the newly - // created BB. - IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; - LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index - << " --> " << getBBInfo(DestBB).Index << "\n"); unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum); if (!InstrBB) { @@ -800,6 +824,11 @@ BasicBlock *FuncPGOInstrumentation::getInstrBB(Edge *E) { dbgs() << "Fail to split critical edge: not instrument this edge.\n"); return nullptr; } + // For a critical edge, we have to split. Instrument the newly + // created BB. + IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; + LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index + << " --> " << getBBInfo(DestBB).Index << "\n"); // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. MST.addEdge(SrcBB, InstrBB, 0); // Second one: Add new edge of InstrBB->DestBB. @@ -807,7 +836,7 @@ BasicBlock *FuncPGOInstrumentation::getInstrBB(Edge *E) { NewEdge1.InMST = true; E->Removed = true; - return InstrBB; + return canInstrument(InstrBB); } // Visit all edge and instrument the edges not in MST, and do value profiling. @@ -925,6 +954,18 @@ struct UseBBInfo : public BBInfo { return BBInfo::infoString(); return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str(); } + + // Add an OutEdge and update the edge count. + void addOutEdge(PGOUseEdge *E) { + OutEdges.push_back(E); + UnknownCountOutEdge++; + } + + // Add an InEdge and update the edge count. + void addInEdge(PGOUseEdge *E) { + InEdges.push_back(E); + UnknownCountInEdge++; + } }; } // end anonymous namespace @@ -1069,24 +1110,50 @@ bool PGOUseFunc::setInstrumentedCounts( if (NumCounters != CountFromProfile.size()) { return false; } + // Set the profile count to the Instrumented BBs. uint32_t I = 0; for (BasicBlock *InstrBB : InstrumentBBs) { uint64_t CountValue = CountFromProfile[I++]; UseBBInfo &Info = getBBInfo(InstrBB); Info.setBBInfoCount(CountValue); - // If only one in-edge, the edge profile count should be the same as BB - // profile count. - if (Info.InEdges.size() == 1) { - Info.InEdges[0]->setEdgeCount(CountValue); - } + } + ProfileCountSize = CountFromProfile.size(); + CountPosition = I; + + // Set the edge count and update the count of unknown edges for BBs. + auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { + E->setEdgeCount(Value); + this->getBBInfo(E->SrcBB).UnknownCountOutEdge--; + this->getBBInfo(E->DestBB).UnknownCountInEdge--; + }; + + // Set the profile count the Instrumented edges. There are BBs that not in + // MST but not instrumented. Need to set the edge count value so that we can + // populate the profile counts later. + for (auto &E : FuncInfo.MST.AllEdges) { + if (E->Removed || E->InMST) + continue; + const BasicBlock *SrcBB = E->SrcBB; + UseBBInfo &SrcInfo = getBBInfo(SrcBB); + // If only one out-edge, the edge profile count should be the same as BB // profile count. - if (Info.OutEdges.size() == 1) { - Info.OutEdges[0]->setEdgeCount(CountValue); + if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1) + setEdgeCount(E.get(), SrcInfo.CountValue); + else { + const BasicBlock *DestBB = E->DestBB; + UseBBInfo &DestInfo = getBBInfo(DestBB); + // If only one in-edge, the edge profile count should be the same as BB + // profile count. + if (DestInfo.CountValid && DestInfo.InEdges.size() == 1) + setEdgeCount(E.get(), DestInfo.CountValue); } + if (E->CountValid) + continue; + // E's count should have been set from profile. If not, this meenas E skips + // the instrumentation. We set the count to 0. + setEdgeCount(E.get(), 0); } - ProfileCountSize = CountFromProfile.size(); - CountPosition = I; return true; } @@ -1180,26 +1247,6 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros) // Populate the counters from instrumented BBs to all BBs. // In the end of this operation, all BBs should have a valid count value. void PGOUseFunc::populateCounters() { - // First set up Count variable for all BBs. - for (auto &E : FuncInfo.MST.AllEdges) { - if (E->Removed) - continue; - - const BasicBlock *SrcBB = E->SrcBB; - const BasicBlock *DestBB = E->DestBB; - UseBBInfo &SrcInfo = getBBInfo(SrcBB); - UseBBInfo &DestInfo = getBBInfo(DestBB); - SrcInfo.OutEdges.push_back(E.get()); - DestInfo.InEdges.push_back(E.get()); - SrcInfo.UnknownCountOutEdge++; - DestInfo.UnknownCountInEdge++; - - if (!E->CountValid) - continue; - DestInfo.UnknownCountInEdge--; - SrcInfo.UnknownCountOutEdge--; - } - bool Changes = true; unsigned NumPasses = 0; while (Changes) { diff --git a/test/Transforms/PGOProfile/Inputs/PR41279.proftext b/test/Transforms/PGOProfile/Inputs/PR41279.proftext new file mode 100644 index 00000000000..58c3e89f089 --- /dev/null +++ b/test/Transforms/PGOProfile/Inputs/PR41279.proftext @@ -0,0 +1,9 @@ +:ir +foo +60927483247 +4 +3 +2 +3 +2 + diff --git a/test/Transforms/PGOProfile/Inputs/PR41279_2.proftext b/test/Transforms/PGOProfile/Inputs/PR41279_2.proftext new file mode 100644 index 00000000000..adc16bbad92 --- /dev/null +++ b/test/Transforms/PGOProfile/Inputs/PR41279_2.proftext @@ -0,0 +1,7 @@ +:ir +f +62077759478 +2 +3 +2 + diff --git a/test/Transforms/PGOProfile/PR41279.ll b/test/Transforms/PGOProfile/PR41279.ll index 373563ba75f..b150353dbaa 100644 --- a/test/Transforms/PGOProfile/PR41279.ll +++ b/test/Transforms/PGOProfile/PR41279.ll @@ -1,6 +1,9 @@ ; Test that instrumentaiton works fine for the case of failing the split critical edges. ; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN ; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN +; RUN: llvm-profdata merge %S/Inputs/PR41279.proftext -o %t.profdata +; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE +; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE declare void @f3({ i8*, i64 }*, { i8*, i64 }*, i64) declare { i8*, i64 } @f0({ i8*, i64 }*) @@ -9,17 +12,22 @@ declare void @invok2({ i8*, i64 }*, i8* noalias readonly align 1, i64) declare void @invok1({ i8*, i64 }*, { i8*, i64 }*, i64) declare i32 @__CxxFrameHandler3(...) -define internal void @foo({ i8*, i64 }*, { i8*, i64 }*) personality i32 (...)* @__CxxFrameHandler3 { +define void @foo({ i8*, i64 }*, { i8*, i64 }*) personality i32 (...)* @__CxxFrameHandler3 { +; USE-LABEL: @foo +; USE-SAME: !prof ![[FUNC_ENTRY_COUNT:[0-9]+]] + %3 = alloca i8, align 1 store i8 0, i8* %3, align 1 %4 = call i64 @f1() %5 = icmp ult i64 %4, 32 br i1 %5, label %7, label %13 +; USE: br i1 %5, label %7, label %13 +; USE-SAME: !prof ![[BW_ENTRY1:[0-9]+]] 6: cleanupret from %17 unwind to caller ; GEN: 6: -; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 2) +; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 60927483247, i32 4, i32 2) 7: store i8 1, i8* %3, align 1 @@ -42,13 +50,13 @@ define internal void @foo({ i8*, i64 }*, { i8*, i64 }*) personality i32 (...)* @ store i8 0, i8* %3, align 1 br label %14 ; GEN: 12: -; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 1) +; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 60927483247, i32 4, i32 1) 13: call void @f3({ i8*, i64 }* %0, { i8*, i64 }* %1, i64 1) br label %14 ; GEN: 13: -; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 0) +; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 60927483247, i32 4, i32 0) 14: ret void @@ -57,11 +65,20 @@ define internal void @foo({ i8*, i64 }*, { i8*, i64 }*) personality i32 (...)* @ store i8 0, i8* %3, align 1 br label %6 ; GEN: 15: -; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 3) +; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 60927483247, i32 4, i32 3) 16: %17 = cleanuppad within none [] %18 = load i8, i8* %3, align 1 %19 = trunc i8 %18 to i1 br i1 %19, label %15, label %6 +; USE: br i1 %19, label %15, label %6 +; USE-SAME: !prof ![[BW_ENTRY2:[0-9]+]] } + +; USE-DAG: {{![0-9]+}} = !{i32 1, !"ProfileSummary", {{![0-9]+}}} +; USE-DAG: {{![0-9]+}} = !{!"DetailedSummary", {{![0-9]+}}} +; USE-DAG: ![[FUNC_ENTRY_COUNT]] = !{!"function_entry_count", i64 8} +; USE_DAG: ![[BW_ENTRY1]] = !{!"branch_weights", i32 5, i32 3} +; USE_DAG: ![[BW_ENTRY2]] = !{!"branch_weights", i32 2, i32 1} + diff --git a/test/Transforms/PGOProfile/PR41279_2.ll b/test/Transforms/PGOProfile/PR41279_2.ll new file mode 100644 index 00000000000..8d5813fac42 --- /dev/null +++ b/test/Transforms/PGOProfile/PR41279_2.ll @@ -0,0 +1,68 @@ +; Test that instrumentaiton works fine for the case of catchswitch stmts. +; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN +; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN +; RUN: llvm-profdata merge %S/Inputs/PR41279_2.proftext -o %t.profdata +; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE +; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE + + +define dso_local void @f() personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) { +; USE-LABEL: @f +; USE-SAME: !prof ![[FUNC_ENTRY_COUNT:[0-9]+]] +; USE-DAG: {{![0-9]+}} = !{i32 1, !"ProfileSummary", {{![0-9]+}}} +; USE-DAG: {{![0-9]+}} = !{!"DetailedSummary", {{![0-9]+}}} +; USE-DAG: ![[FUNC_ENTRY_COUNT]] = !{!"function_entry_count", i64 5} +entry: + %__exception_code = alloca i32, align 4 + %__exception_code2 = alloca i32, align 4 + invoke void @f() #2 + to label %invoke.cont unwind label %catch.dispatch + +catch.dispatch: + %0 = catchswitch within none [label %__except] unwind to caller + +__except: + %1 = catchpad within %0 [i8* null] + catchret from %1 to label %__except1 + +__except1: + %2 = call i32 @llvm.eh.exceptioncode(token %1) + store i32 %2, i32* %__exception_code, align 4 + br label %__try.cont7 +;GEN: _except1: +;GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @__profn_f, i32 0, i32 0), i64 62077759478, i32 2, i32 1) + +invoke.cont: + br label %__try.cont + +__try.cont: + invoke void @f() + to label %invoke.cont3 unwind label %catch.dispatch4 + +catch.dispatch4: + %3 = catchswitch within none [label %__except5] unwind to caller +; GEN: catch.dispatch4: +; GEN-NOT: call void @llvm.instrprof.increment + +__except5: + %4 = catchpad within %3 [i8* null] + catchret from %4 to label %__except6 + +__except6: + %5 = call i32 @llvm.eh.exceptioncode(token %4) + store i32 %5, i32* %__exception_code2, align 4 + br label %__try.cont7 + +__try.cont7: + ret void + +invoke.cont3: + br label %__try.cont7 +;GEN: invoke.cont3: +;GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @__profn_f, i32 0, i32 0), i64 62077759478, i32 2, i32 0) + +} + +declare dso_local i32 @__C_specific_handler(...) + +declare i32 @llvm.eh.exceptioncode(token) -- 2.40.0