namespace llvm {
+class BlockFrequencyInfo;
+class BranchProbabilityInfo;
class DominatorTree;
class Function;
class Instruction;
// and D1 is the D block body. We can then duplicate D0 as D0A and D0B, and
// create the following structure:
// A -> D0A, B -> D0A, I -> D0B, D0A -> D1, D0B -> D1
-bool SplitIndirectBrCriticalEdges(Function &F);
+// If BPI and BFI aren't non-null, BPI/BFI will be updated accordingly.
+bool SplitIndirectBrCriticalEdges(Function &F,
+ BranchProbabilityInfo *BPI = nullptr,
+ BlockFrequencyInfo *BFI = nullptr);
} // end namespace llvm
static void instrumentOneFunc(
Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI,
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
+ // Split indirectbr critical edges here before computing the MST rather than
+ // later in getInstrBB() to avoid invalidating it.
+ SplitIndirectBrCriticalEdges(F, BPI, BFI);
FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI,
BFI);
unsigned NumCounters = FuncInfo.getNumCounters();
continue;
auto *BPI = LookupBPI(F);
auto *BFI = LookupBFI(F);
+ // Split indirectbr critical edges here before computing the MST rather than
+ // later in getInstrBB() to avoid invalidating it.
+ SplitIndirectBrCriticalEdges(F, BPI, BFI);
PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI);
if (!Func.readCounters(PGOReader.get()))
continue;
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/CFG.h"
return IBB;
}
-bool llvm::SplitIndirectBrCriticalEdges(Function &F) {
+bool llvm::SplitIndirectBrCriticalEdges(Function &F,
+ BranchProbabilityInfo *BPI,
+ BlockFrequencyInfo *BFI) {
// Check whether the function has any indirectbrs, and collect which blocks
// they may jump to. Since most functions don't have indirect branches,
// this lowers the common case's overhead to O(Blocks) instead of O(Edges).
if (Targets.empty())
return false;
+ bool ShouldUpdateAnalysis = BPI && BFI;
bool Changed = false;
for (BasicBlock *Target : Targets) {
SmallVector<BasicBlock *, 16> OtherPreds;
continue;
BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split");
+ if (ShouldUpdateAnalysis) {
+ // Copy the BFI/BPI from Target to BodyBlock.
+ for (unsigned I = 0, E = BodyBlock->getTerminator()->getNumSuccessors();
+ I < E; ++I)
+ BPI->setEdgeProbability(BodyBlock, I,
+ BPI->getEdgeProbability(Target, I));
+ BFI->setBlockFreq(BodyBlock, BFI->getBlockFreq(Target).getFrequency());
+ }
// It's possible Target was its own successor through an indirectbr.
// In this case, the indirectbr now comes from BodyBlock.
if (IBRPred == Target)
ValueToValueMapTy VMap;
BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F);
+ BlockFrequency BlockFreqForDirectSucc;
for (BasicBlock *Pred : OtherPreds) {
// If the target is a loop to itself, then the terminator of the split
- // block needs to be updated.
- if (Pred == Target)
- BodyBlock->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
- else
- Pred->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
+ // block (BodyBlock) needs to be updated.
+ BasicBlock *Src = Pred != Target ? Pred : BodyBlock;
+ Src->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
+ if (ShouldUpdateAnalysis)
+ BlockFreqForDirectSucc += BFI->getBlockFreq(Src) *
+ BPI->getEdgeProbability(Src, DirectSucc);
+ }
+ if (ShouldUpdateAnalysis) {
+ BFI->setBlockFreq(DirectSucc, BlockFreqForDirectSucc.getFrequency());
+ BlockFrequency NewBlockFreqForTarget =
+ BFI->getBlockFreq(Target) - BlockFreqForDirectSucc;
+ BFI->setBlockFreq(Target, NewBlockFreqForTarget.getFrequency());
+ BPI->eraseBlock(Target);
}
// Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that
:ir
foo
# Func Hash:
-40197883220
+47485104005
# Num Counters:
4
# Counter Values:
-202
-88
+139
20
5
-
+63
; BRANCHPROB: Printing analysis 'Branch Probability Analysis' for function 'foo':
; BRANCHPROB:---- Branch Probabilities ----
; BRANCHPROB: edge entry -> if.then probability is 0x37c32b17 / 0x80000000 = 43.56%
-; BRANCHPROB: edge entry -> return probability is 0x483cd4e9 / 0x80000000 = 56.44%
+; BRANCHPROB: edge entry -> return.clone probability is 0x483cd4e9 / 0x80000000 = 56.44%
; BRANCHPROB: edge if.then -> return probability is 0x5ba2e8ba / 0x80000000 = 71.59%
; BRANCHPROB: edge if.then -> label2 probability is 0x1d1745d1 / 0x80000000 = 22.73%
; BRANCHPROB: edge if.then -> label3 probability is 0x0745d174 / 0x80000000 = 5.68%
-; BRANCHPROB: edge label2 -> return probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
-; BRANCHPROB: edge label3 -> return probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+; BRANCHPROB: edge label2 -> return.clone probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+; BRANCHPROB: edge label3 -> return.clone probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+; BRANCHPROB: edge return -> .split probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+; BRANCHPROB: edge return.clone -> .split probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
--- /dev/null
+; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s
+
+; Function Attrs: norecurse nounwind readnone uwtable
+define i32 @bar(i32 %v) local_unnamed_addr #0 {
+entry:
+ %mul = shl nsw i32 %v, 1
+ ret i32 %mul
+}
+
+; Function Attrs: norecurse nounwind readonly uwtable
+define i32 @foo(i8* nocapture readonly %p) #1 {
+entry:
+ %targets = alloca [256 x i8*], align 16
+ %arrayidx1 = getelementptr inbounds [256 x i8*], [256 x i8*]* %targets, i64 0, i64 93
+ store i8* blockaddress(@foo, %if.end), i8** %arrayidx1, align 8
+ br label %for.cond2
+
+for.cond2: ; preds = %if.end, %for.cond2, %entry
+; CHECK: for.cond2: ; preds = %.split1
+ %p.addr.0 = phi i8* [ %p, %entry ], [ %incdec.ptr5, %if.end ], [ %incdec.ptr, %for.cond2 ]
+ %incdec.ptr = getelementptr inbounds i8, i8* %p.addr.0, i64 1
+ %0 = load i8, i8* %p.addr.0, align 1
+ %cond = icmp eq i8 %0, 93
+ br i1 %cond, label %if.end.preheader, label %for.cond2
+
+if.end.preheader: ; preds = %for.cond2
+ br label %if.end
+
+if.end: ; preds = %if.end.preheader, %if.end
+; CHECK: if.end: ; preds = %.split1
+ %p.addr.1 = phi i8* [ %incdec.ptr5, %if.end ], [ %incdec.ptr, %if.end.preheader ]
+ %incdec.ptr5 = getelementptr inbounds i8, i8* %p.addr.1, i64 1
+ %1 = load i8, i8* %p.addr.1, align 1
+ %idxprom6 = zext i8 %1 to i64
+ %arrayidx7 = getelementptr inbounds [256 x i8*], [256 x i8*]* %targets, i64 0, i64 %idxprom6
+ %2 = load i8*, i8** %arrayidx7, align 8
+ indirectbr i8* %2, [label %for.cond2, label %if.end]
+; CHECK: indirectbr i8* %2, [label %for.cond2, label %if.end]
+}