const std::string infoString() const {
return (Twine("Index=") + Twine(Index)).str();
}
+
+ // Empty function -- only applicable to UseBBInfo.
+ void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
+
+ // Empty function -- only applicable to UseBBInfo.
+ void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
};
// This class implements the CFG edges. Note the CFG can be a multi-graph.
}
// Collect all the BBs that will be instruments and return them in
-// InstrumentBBs.
+// InstrumentBBs and setup InEdges/OutEdge for UseBBInfo.
template <class Edge, class BBInfo>
void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
std::vector<BasicBlock *> &InstrumentBBs) {
if (InstrBB)
InstrumentBBs.push_back(InstrBB);
}
+
+ // Set up InEdges/OutEdges for all BBs.
+ for (auto &E : MST.AllEdges) {
+ if (E->Removed)
+ continue;
+ const BasicBlock *SrcBB = E->SrcBB;
+ const BasicBlock *DestBB = E->DestBB;
+ BBInfo &SrcInfo = getBBInfo(SrcBB);
+ BBInfo &DestInfo = getBBInfo(DestBB);
+ SrcInfo.addOutEdge(E.get());
+ DestInfo.addInEdge(E.get());
+ }
}
// Given a CFG E to be instrumented, find which BB to place the instrumented
if (DestBB == nullptr)
return SrcBB;
+ auto canInstrument = [this](BasicBlock *BB) -> BasicBlock * {
+ // There are basic blocks (such as catchswitch) cannot be instrumented.
+ // If the returned first insertion point is the end of BB, skip this BB.
+ if (BB->getFirstInsertionPt() == BB->end()) {
+ LLVM_DEBUG(dbgs() << "Cannot instrument BB index=" << getBBInfo(BB).Index
+ << "\n");
+ return nullptr;
+ }
+ return BB;
+ };
+
// Instrument the SrcBB if it has a single successor,
// otherwise, the DestBB if this is not a critical edge.
Instruction *TI = SrcBB->getTerminator();
if (TI->getNumSuccessors() <= 1)
- return SrcBB;
+ return canInstrument(SrcBB);
if (!E->IsCritical)
- return DestBB;
+ return canInstrument(DestBB);
- // For a critical edge, we have to split. Instrument the newly
- // created BB.
- IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
- LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
- << " --> " << getBBInfo(DestBB).Index << "\n");
unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
if (!InstrBB) {
dbgs() << "Fail to split critical edge: not instrument this edge.\n");
return nullptr;
}
+ // For a critical edge, we have to split. Instrument the newly
+ // created BB.
+ IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
+ LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
+ << " --> " << getBBInfo(DestBB).Index << "\n");
// Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
MST.addEdge(SrcBB, InstrBB, 0);
// Second one: Add new edge of InstrBB->DestBB.
NewEdge1.InMST = true;
E->Removed = true;
- return InstrBB;
+ return canInstrument(InstrBB);
}
// Visit all edge and instrument the edges not in MST, and do value profiling.
return BBInfo::infoString();
return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str();
}
+
+ // Add an OutEdge and update the edge count.
+ void addOutEdge(PGOUseEdge *E) {
+ OutEdges.push_back(E);
+ UnknownCountOutEdge++;
+ }
+
+ // Add an InEdge and update the edge count.
+ void addInEdge(PGOUseEdge *E) {
+ InEdges.push_back(E);
+ UnknownCountInEdge++;
+ }
};
} // end anonymous namespace
if (NumCounters != CountFromProfile.size()) {
return false;
}
+ // Set the profile count to the Instrumented BBs.
uint32_t I = 0;
for (BasicBlock *InstrBB : InstrumentBBs) {
uint64_t CountValue = CountFromProfile[I++];
UseBBInfo &Info = getBBInfo(InstrBB);
Info.setBBInfoCount(CountValue);
- // If only one in-edge, the edge profile count should be the same as BB
- // profile count.
- if (Info.InEdges.size() == 1) {
- Info.InEdges[0]->setEdgeCount(CountValue);
- }
+ }
+ ProfileCountSize = CountFromProfile.size();
+ CountPosition = I;
+
+ // Set the edge count and update the count of unknown edges for BBs.
+ auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
+ E->setEdgeCount(Value);
+ this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
+ this->getBBInfo(E->DestBB).UnknownCountInEdge--;
+ };
+
+ // Set the profile count the Instrumented edges. There are BBs that not in
+ // MST but not instrumented. Need to set the edge count value so that we can
+ // populate the profile counts later.
+ for (auto &E : FuncInfo.MST.AllEdges) {
+ if (E->Removed || E->InMST)
+ continue;
+ const BasicBlock *SrcBB = E->SrcBB;
+ UseBBInfo &SrcInfo = getBBInfo(SrcBB);
+
// If only one out-edge, the edge profile count should be the same as BB
// profile count.
- if (Info.OutEdges.size() == 1) {
- Info.OutEdges[0]->setEdgeCount(CountValue);
+ if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1)
+ setEdgeCount(E.get(), SrcInfo.CountValue);
+ else {
+ const BasicBlock *DestBB = E->DestBB;
+ UseBBInfo &DestInfo = getBBInfo(DestBB);
+ // If only one in-edge, the edge profile count should be the same as BB
+ // profile count.
+ if (DestInfo.CountValid && DestInfo.InEdges.size() == 1)
+ setEdgeCount(E.get(), DestInfo.CountValue);
}
+ if (E->CountValid)
+ continue;
+ // E's count should have been set from profile. If not, this meenas E skips
+ // the instrumentation. We set the count to 0.
+ setEdgeCount(E.get(), 0);
}
- ProfileCountSize = CountFromProfile.size();
- CountPosition = I;
return true;
}
// Populate the counters from instrumented BBs to all BBs.
// In the end of this operation, all BBs should have a valid count value.
void PGOUseFunc::populateCounters() {
- // First set up Count variable for all BBs.
- for (auto &E : FuncInfo.MST.AllEdges) {
- if (E->Removed)
- continue;
-
- const BasicBlock *SrcBB = E->SrcBB;
- const BasicBlock *DestBB = E->DestBB;
- UseBBInfo &SrcInfo = getBBInfo(SrcBB);
- UseBBInfo &DestInfo = getBBInfo(DestBB);
- SrcInfo.OutEdges.push_back(E.get());
- DestInfo.InEdges.push_back(E.get());
- SrcInfo.UnknownCountOutEdge++;
- DestInfo.UnknownCountInEdge++;
-
- if (!E->CountValid)
- continue;
- DestInfo.UnknownCountInEdge--;
- SrcInfo.UnknownCountOutEdge--;
- }
-
bool Changes = true;
unsigned NumPasses = 0;
while (Changes) {
--- /dev/null
+:ir
+foo
+60927483247
+4
+3
+2
+3
+2
+
--- /dev/null
+:ir
+f
+62077759478
+2
+3
+2
+
; Test that instrumentaiton works fine for the case of failing the split critical edges.
; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: llvm-profdata merge %S/Inputs/PR41279.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
declare void @f3({ i8*, i64 }*, { i8*, i64 }*, i64)
declare { i8*, i64 } @f0({ i8*, i64 }*)
declare void @invok1({ i8*, i64 }*, { i8*, i64 }*, i64)
declare i32 @__CxxFrameHandler3(...)
-define internal void @foo({ i8*, i64 }*, { i8*, i64 }*) personality i32 (...)* @__CxxFrameHandler3 {
+define void @foo({ i8*, i64 }*, { i8*, i64 }*) personality i32 (...)* @__CxxFrameHandler3 {
+; USE-LABEL: @foo
+; USE-SAME: !prof ![[FUNC_ENTRY_COUNT:[0-9]+]]
+
%3 = alloca i8, align 1
store i8 0, i8* %3, align 1
%4 = call i64 @f1()
%5 = icmp ult i64 %4, 32
br i1 %5, label %7, label %13
+; USE: br i1 %5, label %7, label %13
+; USE-SAME: !prof ![[BW_ENTRY1:[0-9]+]]
6:
cleanupret from %17 unwind to caller
; GEN: 6:
-; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 2)
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 60927483247, i32 4, i32 2)
7:
store i8 1, i8* %3, align 1
store i8 0, i8* %3, align 1
br label %14
; GEN: 12:
-; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 1)
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 60927483247, i32 4, i32 1)
13:
call void @f3({ i8*, i64 }* %0, { i8*, i64 }* %1, i64 1)
br label %14
; GEN: 13:
-; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 0)
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 60927483247, i32 4, i32 0)
14:
ret void
store i8 0, i8* %3, align 1
br label %6
; GEN: 15:
-; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 3)
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 60927483247, i32 4, i32 3)
16:
%17 = cleanuppad within none []
%18 = load i8, i8* %3, align 1
%19 = trunc i8 %18 to i1
br i1 %19, label %15, label %6
+; USE: br i1 %19, label %15, label %6
+; USE-SAME: !prof ![[BW_ENTRY2:[0-9]+]]
}
+
+; USE-DAG: {{![0-9]+}} = !{i32 1, !"ProfileSummary", {{![0-9]+}}}
+; USE-DAG: {{![0-9]+}} = !{!"DetailedSummary", {{![0-9]+}}}
+; USE-DAG: ![[FUNC_ENTRY_COUNT]] = !{!"function_entry_count", i64 8}
+; USE_DAG: ![[BW_ENTRY1]] = !{!"branch_weights", i32 5, i32 3}
+; USE_DAG: ![[BW_ENTRY2]] = !{!"branch_weights", i32 2, i32 1}
+
--- /dev/null
+; Test that instrumentaiton works fine for the case of catchswitch stmts.
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: llvm-profdata merge %S/Inputs/PR41279_2.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+
+
+define dso_local void @f() personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) {
+; USE-LABEL: @f
+; USE-SAME: !prof ![[FUNC_ENTRY_COUNT:[0-9]+]]
+; USE-DAG: {{![0-9]+}} = !{i32 1, !"ProfileSummary", {{![0-9]+}}}
+; USE-DAG: {{![0-9]+}} = !{!"DetailedSummary", {{![0-9]+}}}
+; USE-DAG: ![[FUNC_ENTRY_COUNT]] = !{!"function_entry_count", i64 5}
+entry:
+ %__exception_code = alloca i32, align 4
+ %__exception_code2 = alloca i32, align 4
+ invoke void @f() #2
+ to label %invoke.cont unwind label %catch.dispatch
+
+catch.dispatch:
+ %0 = catchswitch within none [label %__except] unwind to caller
+
+__except:
+ %1 = catchpad within %0 [i8* null]
+ catchret from %1 to label %__except1
+
+__except1:
+ %2 = call i32 @llvm.eh.exceptioncode(token %1)
+ store i32 %2, i32* %__exception_code, align 4
+ br label %__try.cont7
+;GEN: _except1:
+;GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @__profn_f, i32 0, i32 0), i64 62077759478, i32 2, i32 1)
+
+invoke.cont:
+ br label %__try.cont
+
+__try.cont:
+ invoke void @f()
+ to label %invoke.cont3 unwind label %catch.dispatch4
+
+catch.dispatch4:
+ %3 = catchswitch within none [label %__except5] unwind to caller
+; GEN: catch.dispatch4:
+; GEN-NOT: call void @llvm.instrprof.increment
+
+__except5:
+ %4 = catchpad within %3 [i8* null]
+ catchret from %4 to label %__except6
+
+__except6:
+ %5 = call i32 @llvm.eh.exceptioncode(token %4)
+ store i32 %5, i32* %__exception_code2, align 4
+ br label %__try.cont7
+
+__try.cont7:
+ ret void
+
+invoke.cont3:
+ br label %__try.cont7
+;GEN: invoke.cont3:
+;GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @__profn_f, i32 0, i32 0), i64 62077759478, i32 2, i32 0)
+
+}
+
+declare dso_local i32 @__C_specific_handler(...)
+
+declare i32 @llvm.eh.exceptioncode(token)