From e0e019f6a2d76e140221d9c10159a444839e2563 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Mon, 6 Jan 2014 22:27:43 +0000 Subject: [PATCH] CodeGen: Initial instrumentation based PGO implementation git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@198640 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGCUDARuntime.cpp | 3 +- lib/CodeGen/CGCall.cpp | 1 + lib/CodeGen/CGException.cpp | 6 + lib/CodeGen/CGExpr.cpp | 14 +- lib/CodeGen/CGExprAgg.cpp | 8 +- lib/CodeGen/CGExprComplex.cpp | 8 +- lib/CodeGen/CGExprScalar.cpp | 37 +- lib/CodeGen/CGObjC.cpp | 9 +- lib/CodeGen/CGStmt.cpp | 227 +++++++-- lib/CodeGen/CMakeLists.txt | 1 + lib/CodeGen/CodeGenFunction.cpp | 92 +++- lib/CodeGen/CodeGenFunction.h | 28 +- lib/CodeGen/CodeGenModule.cpp | 11 +- lib/CodeGen/CodeGenModule.h | 8 +- lib/CodeGen/CodeGenPGO.cpp | 456 +++++++++++++++++++ lib/CodeGen/CodeGenPGO.h | 216 +++++++++ test/CodeGen/Inputs/instr-profile.pgodata | 127 ++++++ test/CodeGen/instr-profile.c | 428 +++++++++++++++++ test/CodeGenCXX/Inputs/instr-profile.pgodata | 16 + test/CodeGenCXX/instr-profile.cpp | 73 +++ 20 files changed, 1710 insertions(+), 59 deletions(-) create mode 100644 lib/CodeGen/CodeGenPGO.cpp create mode 100644 lib/CodeGen/CodeGenPGO.h create mode 100644 test/CodeGen/Inputs/instr-profile.pgodata create mode 100644 test/CodeGen/instr-profile.c create mode 100644 test/CodeGenCXX/Inputs/instr-profile.pgodata create mode 100644 test/CodeGenCXX/instr-profile.cpp diff --git a/lib/CodeGen/CGCUDARuntime.cpp b/lib/CodeGen/CGCUDARuntime.cpp index eaf31bb6f6..54a28f51bd 100644 --- a/lib/CodeGen/CGCUDARuntime.cpp +++ b/lib/CodeGen/CGCUDARuntime.cpp @@ -31,7 +31,8 @@ RValue CGCUDARuntime::EmitCUDAKernelCallExpr(CodeGenFunction &CGF, llvm::BasicBlock *ContBlock = CGF.createBasicBlock("kcall.end"); CodeGenFunction::ConditionalEvaluation eval(CGF); - CGF.EmitBranchOnBoolExpr(E->getConfig(), ContBlock, ConfigOKBlock); + CGF.EmitBranchOnBoolExpr(E->getConfig(), ContBlock, ConfigOKBlock, + /*TrueCount=*/0); eval.begin(CGF); CGF.EmitBlock(ConfigOKBlock); diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp index 8cc61e99bf..4bdd032a2b 100644 --- a/lib/CodeGen/CGCall.cpp +++ b/lib/CodeGen/CGCall.cpp @@ -2184,6 +2184,7 @@ void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee, call->setCallingConv(getRuntimeCC()); Builder.CreateUnreachable(); } + PGO.setCurrentRegionCount(0); } /// Emits a call or invoke instruction to the given nullary runtime diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp index 39a992aab1..26e061d41a 100644 --- a/lib/CodeGen/CGException.cpp +++ b/lib/CodeGen/CGException.cpp @@ -1294,6 +1294,10 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) { // Initialize the catch variable and set up the cleanups. BeginCatch(*this, C); + // Emit the PGO counter increment + RegionCounter CatchCnt = getPGORegionCounter(C); + CatchCnt.beginRegion(Builder); + // Perform the body of the catch. EmitStmt(C->getHandlerBlock()); @@ -1320,7 +1324,9 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) { Builder.CreateBr(ContBB); } + RegionCounter ContCnt = getPGORegionCounter(&S); EmitBlock(ContBB); + ContCnt.beginRegion(Builder); } namespace { diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index 92966d0eb1..9c108e9375 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -2651,6 +2651,7 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) { } OpaqueValueMapping binding(*this, expr); + RegionCounter Cnt = getPGORegionCounter(expr); const Expr *condExpr = expr->getCond(); bool CondExprBool; @@ -2658,8 +2659,12 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) { const Expr *live = expr->getTrueExpr(), *dead = expr->getFalseExpr(); if (!CondExprBool) std::swap(live, dead); - if (!ContainsLabel(dead)) + if (!ContainsLabel(dead)) { + // If the true case is live, we need to track its region + if (CondExprBool) + Cnt.beginRegion(Builder); return EmitLValue(live); + } } llvm::BasicBlock *lhsBlock = createBasicBlock("cond.true"); @@ -2667,13 +2672,15 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) { llvm::BasicBlock *contBlock = createBasicBlock("cond.end"); ConditionalEvaluation eval(*this); - EmitBranchOnBoolExpr(condExpr, lhsBlock, rhsBlock); + EmitBranchOnBoolExpr(condExpr, lhsBlock, rhsBlock, Cnt.getCount()); // Any temporaries created here are conditional. EmitBlock(lhsBlock); + Cnt.beginRegion(Builder); eval.begin(*this); LValue lhs = EmitLValue(expr->getTrueExpr()); eval.end(*this); + Cnt.adjustFallThroughCount(); if (!lhs.isSimple()) return EmitUnsupportedLValue(expr, "conditional operator"); @@ -2683,14 +2690,17 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) { // Any temporaries created here are conditional. EmitBlock(rhsBlock); + Cnt.beginElseRegion(); eval.begin(*this); LValue rhs = EmitLValue(expr->getFalseExpr()); eval.end(*this); + Cnt.adjustFallThroughCount(); if (!rhs.isSimple()) return EmitUnsupportedLValue(expr, "conditional operator"); rhsBlock = Builder.GetInsertBlock(); EmitBlock(contBlock); + Cnt.applyAdjustmentsToRegion(); llvm::PHINode *phi = Builder.CreatePHI(lhs.getAddress()->getType(), 2, "cond-lvalue"); diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp index 35e6988e46..1a6274e828 100644 --- a/lib/CodeGen/CGExprAgg.cpp +++ b/lib/CodeGen/CGExprAgg.cpp @@ -892,15 +892,18 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) { // Bind the common expression if necessary. CodeGenFunction::OpaqueValueMapping binding(CGF, E); + RegionCounter Cnt = CGF.getPGORegionCounter(E); CodeGenFunction::ConditionalEvaluation eval(CGF); - CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock); + CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock, Cnt.getCount()); // Save whether the destination's lifetime is externally managed. bool isExternallyDestructed = Dest.isExternallyDestructed(); eval.begin(CGF); CGF.EmitBlock(LHSBlock); + Cnt.beginRegion(Builder); Visit(E->getTrueExpr()); + Cnt.adjustFallThroughCount(); eval.end(CGF); assert(CGF.HaveInsertPoint() && "expression evaluation ended with no IP!"); @@ -914,10 +917,13 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) { eval.begin(CGF); CGF.EmitBlock(RHSBlock); + Cnt.beginElseRegion(); Visit(E->getFalseExpr()); + Cnt.adjustFallThroughCount(); eval.end(CGF); CGF.EmitBlock(ContBlock); + Cnt.applyAdjustmentsToRegion(); } void AggExprEmitter::VisitChooseExpr(const ChooseExpr *CE) { diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp index ded4bb8617..145d94620e 100644 --- a/lib/CodeGen/CGExprComplex.cpp +++ b/lib/CodeGen/CGExprComplex.cpp @@ -752,22 +752,28 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) { // Bind the common expression if necessary. CodeGenFunction::OpaqueValueMapping binding(CGF, E); + RegionCounter Cnt = CGF.getPGORegionCounter(E); CodeGenFunction::ConditionalEvaluation eval(CGF); - CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock); + CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock, Cnt.getCount()); eval.begin(CGF); CGF.EmitBlock(LHSBlock); + Cnt.beginRegion(Builder); ComplexPairTy LHS = Visit(E->getTrueExpr()); + Cnt.adjustFallThroughCount(); LHSBlock = Builder.GetInsertBlock(); CGF.EmitBranch(ContBlock); eval.end(CGF); eval.begin(CGF); CGF.EmitBlock(RHSBlock); + Cnt.beginElseRegion(); ComplexPairTy RHS = Visit(E->getFalseExpr()); + Cnt.adjustFallThroughCount(); RHSBlock = Builder.GetInsertBlock(); CGF.EmitBlock(ContBlock); eval.end(CGF); + Cnt.applyAdjustmentsToRegion(); // Create a PHI node for the real part. llvm::PHINode *RealPN = Builder.CreatePHI(LHS.first->getType(), 2, "cond.r"); diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp index 15abc06b62..a38c69494c 100644 --- a/lib/CodeGen/CGExprScalar.cpp +++ b/lib/CodeGen/CGExprScalar.cpp @@ -2874,8 +2874,12 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) { } Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) { + RegionCounter Cnt = CGF.getPGORegionCounter(E); + // Perform vector logical and on comparisons with zero vectors. if (E->getType()->isVectorType()) { + Cnt.beginRegion(Builder); + Value *LHS = Visit(E->getLHS()); Value *RHS = Visit(E->getRHS()); Value *Zero = llvm::ConstantAggregateZero::get(LHS->getType()); @@ -2897,6 +2901,8 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) { bool LHSCondVal; if (CGF.ConstantFoldsToSimpleInteger(E->getLHS(), LHSCondVal)) { if (LHSCondVal) { // If we have 1 && X, just emit X. + Cnt.beginRegion(Builder); + Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS()); // ZExt result to int or bool. return Builder.CreateZExtOrBitCast(RHSCond, ResTy, "land.ext"); @@ -2913,7 +2919,7 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) { CodeGenFunction::ConditionalEvaluation eval(CGF); // Branch on the LHS first. If it is false, go to the failure (cont) block. - CGF.EmitBranchOnBoolExpr(E->getLHS(), RHSBlock, ContBlock); + CGF.EmitBranchOnBoolExpr(E->getLHS(), RHSBlock, ContBlock, Cnt.getCount()); // Any edges into the ContBlock are now from an (indeterminate number of) // edges from this first condition. All of these values will be false. Start @@ -2926,7 +2932,9 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) { eval.begin(CGF); CGF.EmitBlock(RHSBlock); + Cnt.beginRegion(Builder); Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS()); + Cnt.adjustFallThroughCount(); eval.end(CGF); // Reaquire the RHS block, as there may be subblocks inserted. @@ -2939,14 +2947,19 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) { Builder.SetCurrentDebugLocation(llvm::DebugLoc()); CGF.EmitBlock(ContBlock); PN->addIncoming(RHSCond, RHSBlock); + Cnt.applyAdjustmentsToRegion(); // ZExt result to int. return Builder.CreateZExtOrBitCast(PN, ResTy, "land.ext"); } Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) { + RegionCounter Cnt = CGF.getPGORegionCounter(E); + // Perform vector logical or on comparisons with zero vectors. if (E->getType()->isVectorType()) { + Cnt.beginRegion(Builder); + Value *LHS = Visit(E->getLHS()); Value *RHS = Visit(E->getRHS()); Value *Zero = llvm::ConstantAggregateZero::get(LHS->getType()); @@ -2968,6 +2981,8 @@ Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) { bool LHSCondVal; if (CGF.ConstantFoldsToSimpleInteger(E->getLHS(), LHSCondVal)) { if (!LHSCondVal) { // If we have 0 || X, just emit X. + Cnt.beginRegion(Builder); + Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS()); // ZExt result to int or bool. return Builder.CreateZExtOrBitCast(RHSCond, ResTy, "lor.ext"); @@ -2984,7 +2999,8 @@ Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) { CodeGenFunction::ConditionalEvaluation eval(CGF); // Branch on the LHS first. If it is true, go to the success (cont) block. - CGF.EmitBranchOnBoolExpr(E->getLHS(), ContBlock, RHSBlock); + CGF.EmitBranchOnBoolExpr(E->getLHS(), ContBlock, RHSBlock, + Cnt.getParentCount() - Cnt.getCount()); // Any edges into the ContBlock are now from an (indeterminate number of) // edges from this first condition. All of these values will be true. Start @@ -2999,7 +3015,9 @@ Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) { // Emit the RHS condition as a bool value. CGF.EmitBlock(RHSBlock); + Cnt.beginRegion(Builder); Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS()); + Cnt.adjustFallThroughCount(); eval.end(CGF); @@ -3010,6 +3028,7 @@ Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) { // into the phi node for the edge with the value of RHSCond. CGF.EmitBlock(ContBlock); PN->addIncoming(RHSCond, RHSBlock); + Cnt.applyAdjustmentsToRegion(); // ZExt result to int. return Builder.CreateZExtOrBitCast(PN, ResTy, "lor.ext"); @@ -3049,6 +3068,7 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) { // Bind the common expression if necessary. CodeGenFunction::OpaqueValueMapping binding(CGF, E); + RegionCounter Cnt = CGF.getPGORegionCounter(E); Expr *condExpr = E->getCond(); Expr *lhsExpr = E->getTrueExpr(); @@ -3063,6 +3083,8 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) { // If the dead side doesn't have labels we need, just emit the Live part. if (!CGF.ContainsLabel(dead)) { + if (CondExprBool) + Cnt.beginRegion(Builder); Value *Result = Visit(live); // If the live part is a throw expression, it acts like it has a void @@ -3079,6 +3101,8 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) { // the select function. if (CGF.getLangOpts().OpenCL && condExpr->getType()->isVectorType()) { + Cnt.beginRegion(Builder); + llvm::Value *CondV = CGF.EmitScalarExpr(condExpr); llvm::Value *LHS = Visit(lhsExpr); llvm::Value *RHS = Visit(rhsExpr); @@ -3122,6 +3146,8 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) { // safe to evaluate the LHS and RHS unconditionally. if (isCheapEnoughToEvaluateUnconditionally(lhsExpr, CGF) && isCheapEnoughToEvaluateUnconditionally(rhsExpr, CGF)) { + Cnt.beginRegion(Builder); + llvm::Value *CondV = CGF.EvaluateExprAsBool(condExpr); llvm::Value *LHS = Visit(lhsExpr); llvm::Value *RHS = Visit(rhsExpr); @@ -3138,23 +3164,28 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) { llvm::BasicBlock *ContBlock = CGF.createBasicBlock("cond.end"); CodeGenFunction::ConditionalEvaluation eval(CGF); - CGF.EmitBranchOnBoolExpr(condExpr, LHSBlock, RHSBlock); + CGF.EmitBranchOnBoolExpr(condExpr, LHSBlock, RHSBlock, Cnt.getCount()); CGF.EmitBlock(LHSBlock); + Cnt.beginRegion(Builder); eval.begin(CGF); Value *LHS = Visit(lhsExpr); eval.end(CGF); + Cnt.adjustFallThroughCount(); LHSBlock = Builder.GetInsertBlock(); Builder.CreateBr(ContBlock); CGF.EmitBlock(RHSBlock); + Cnt.beginElseRegion(); eval.begin(CGF); Value *RHS = Visit(rhsExpr); eval.end(CGF); + Cnt.adjustFallThroughCount(); RHSBlock = Builder.GetInsertBlock(); CGF.EmitBlock(ContBlock); + Cnt.applyAdjustmentsToRegion(); // If the LHS or RHS is a throw expression, it will be legitimately null. if (!LHS) diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp index 35447166dc..d48a6a3019 100644 --- a/lib/CodeGen/CGObjC.cpp +++ b/lib/CodeGen/CGObjC.cpp @@ -1523,10 +1523,13 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ llvm::Value *initialMutations = Builder.CreateLoad(StateMutationsPtr, "forcoll.initial-mutations"); + RegionCounter Cnt = getPGORegionCounter(&S); + // Start looping. This is the point we return to whenever we have a // fresh, non-empty batch of objects. llvm::BasicBlock *LoopBodyBB = createBasicBlock("forcoll.loopbody"); EmitBlock(LoopBodyBB); + Cnt.beginRegion(Builder); // The current index into the buffer. llvm::PHINode *index = Builder.CreatePHI(UnsignedLongLTy, 3, "forcoll.index"); @@ -1623,7 +1626,7 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ EmitAutoVarCleanups(variable); // Perform the loop body, setting up break and continue labels. - BreakContinueStack.push_back(BreakContinue(LoopEnd, AfterBody)); + BreakContinueStack.push_back(BreakContinue(LoopEnd, AfterBody, &Cnt)); { RunCleanupsScope Scope(*this); EmitStmt(S.getBody()); @@ -1642,6 +1645,7 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ llvm::Value *indexPlusOne = Builder.CreateAdd(index, llvm::ConstantInt::get(UnsignedLongLTy, 1)); + // TODO: We should probably model this as a "continue" for PGO // If we haven't overrun the buffer yet, we can continue. Builder.CreateCondBr(Builder.CreateICmpULT(indexPlusOne, count), LoopBodyBB, FetchMoreBB); @@ -1665,6 +1669,8 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ index->addIncoming(zero, Builder.GetInsertBlock()); count->addIncoming(refetchCount, Builder.GetInsertBlock()); + // TODO: We should be applying PGO weights here, but this needs to handle the + // branch before FetchMoreBB or we risk getting the numbers wrong. Builder.CreateCondBr(Builder.CreateICmpEQ(refetchCount, zero), EmptyBB, LoopBodyBB); @@ -1687,6 +1693,7 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ PopCleanupBlock(); EmitBlock(LoopEnd.getBlock()); + // TODO: Once we calculate PGO weights above, set the region count here } void CodeGenFunction::EmitObjCAtTryStmt(const ObjCAtTryStmt &S) { diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp index 6224839ff7..5031d60cbb 100644 --- a/lib/CodeGen/CGStmt.cpp +++ b/lib/CodeGen/CGStmt.cpp @@ -358,7 +358,9 @@ void CodeGenFunction::EmitLabel(const LabelDecl *D) { ResolveBranchFixups(Dest.getBlock()); } + RegionCounter Cnt = getPGORegionCounter(D->getStmt()); EmitBlock(Dest.getBlock()); + Cnt.beginRegion(Builder); } /// Change the cleanup scope of the labels in this lexical scope to @@ -402,12 +404,14 @@ void CodeGenFunction::EmitGotoStmt(const GotoStmt &S) { EmitStopPoint(&S); EmitBranchThroughCleanup(getJumpDestForLabel(S.getLabel())); + PGO.setCurrentRegionCount(0); } void CodeGenFunction::EmitIndirectGotoStmt(const IndirectGotoStmt &S) { if (const LabelDecl *Target = S.getConstantTarget()) { EmitBranchThroughCleanup(getJumpDestForLabel(Target)); + PGO.setCurrentRegionCount(0); return; } @@ -424,12 +428,14 @@ void CodeGenFunction::EmitIndirectGotoStmt(const IndirectGotoStmt &S) { cast(IndGotoBB->begin())->addIncoming(V, CurBB); EmitBranch(IndGotoBB); + PGO.setCurrentRegionCount(0); } void CodeGenFunction::EmitIfStmt(const IfStmt &S) { // C99 6.8.4.1: The first substatement is executed if the expression compares // unequal to 0. The condition must be a scalar type. LexicalScope ConditionScope(*this, S.getSourceRange()); + RegionCounter Cnt = getPGORegionCounter(&S); if (S.getConditionVariable()) EmitAutoVarDecl(*S.getConditionVariable()); @@ -447,6 +453,8 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) { // If the skipped block has no labels in it, just emit the executed block. // This avoids emitting dead code and simplifies the CFG substantially. if (!ContainsLabel(Skipped)) { + if (CondConstant) + Cnt.beginRegion(Builder); if (Executed) { RunCleanupsScope ExecutedScope(*this); EmitStmt(Executed); @@ -462,14 +470,17 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) { llvm::BasicBlock *ElseBlock = ContBlock; if (S.getElse()) ElseBlock = createBasicBlock("if.else"); - EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock); + + EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock, Cnt.getCount()); // Emit the 'then' code. - EmitBlock(ThenBlock); + EmitBlock(ThenBlock); + Cnt.beginRegion(Builder); { RunCleanupsScope ThenScope(*this); EmitStmt(S.getThen()); } + Cnt.adjustFallThroughCount(); EmitBranch(ContBlock); // Emit the 'else' code if present. @@ -478,10 +489,12 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) { if (getDebugInfo()) Builder.SetCurrentDebugLocation(llvm::DebugLoc()); EmitBlock(ElseBlock); + Cnt.beginElseRegion(); { RunCleanupsScope ElseScope(*this); EmitStmt(Else); } + Cnt.adjustFallThroughCount(); // There is no need to emit line number for unconditional branch. if (getDebugInfo()) Builder.SetCurrentDebugLocation(llvm::DebugLoc()); @@ -490,9 +503,12 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) { // Emit the continuation block for code after the if. EmitBlock(ContBlock, true); + Cnt.applyAdjustmentsToRegion(); } void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) { + RegionCounter Cnt = getPGORegionCounter(&S); + // Emit the header for the loop, which will also become // the continue target. JumpDest LoopHeader = getJumpDestInCurrentScope("while.cond"); @@ -503,7 +519,7 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) { JumpDest LoopExit = getJumpDestInCurrentScope("while.end"); // Store the blocks to use for break and continue. - BreakContinueStack.push_back(BreakContinue(LoopExit, LoopHeader)); + BreakContinueStack.push_back(BreakContinue(LoopExit, LoopHeader, &Cnt)); // C++ [stmt.while]p2: // When the condition of a while statement is a declaration, the @@ -525,6 +541,7 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) { // while(1) is common, avoid extra exit blocks. Be sure // to correctly handle break/continue though. bool EmitBoolCondBranch = true; + llvm::BranchInst *CondBr = NULL; if (llvm::ConstantInt *C = dyn_cast(BoolCondVal)) if (C->isOne()) EmitBoolCondBranch = false; @@ -536,7 +553,7 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) { if (ConditionScope.requiresCleanups()) ExitBlock = createBasicBlock("while.exit"); - Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); + CondBr = Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); if (ExitBlock != LoopExit.getBlock()) { EmitBlock(ExitBlock); @@ -549,11 +566,19 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) { { RunCleanupsScope BodyScope(*this); EmitBlock(LoopBody); + Cnt.beginRegion(Builder); EmitStmt(S.getBody()); } + Cnt.adjustFallThroughCount(); BreakContinueStack.pop_back(); + uint64_t LoopCount = Cnt.getCount(); + uint64_t ExitCount = Cnt.getLoopExitCount(); + if (EmitBoolCondBranch) + CondBr->setMetadata(llvm::LLVMContext::MD_prof, + PGO.createBranchWeights(LoopCount, ExitCount)); + // Immediately force cleanup. ConditionScope.ForceCleanup(); @@ -562,6 +587,7 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) { // Emit the exit block. EmitBlock(LoopExit.getBlock(), true); + PGO.setCurrentRegionCount(ExitCount + Cnt.getBreakCounter().getCount()); // The LoopHeader typically is just a branch if we skipped emitting // a branch, try to erase it. @@ -573,16 +599,20 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S) { JumpDest LoopExit = getJumpDestInCurrentScope("do.end"); JumpDest LoopCond = getJumpDestInCurrentScope("do.cond"); + RegionCounter Cnt = getPGORegionCounter(&S); + // Store the blocks to use for break and continue. - BreakContinueStack.push_back(BreakContinue(LoopExit, LoopCond)); + BreakContinueStack.push_back(BreakContinue(LoopExit, LoopCond, &Cnt)); // Emit the body of the loop. llvm::BasicBlock *LoopBody = createBasicBlock("do.body"); EmitBlock(LoopBody); + Cnt.beginRegion(Builder); { RunCleanupsScope BodyScope(*this); EmitStmt(S.getBody()); } + Cnt.adjustFallThroughCount(); BreakContinueStack.pop_back(); @@ -603,12 +633,18 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S) { if (C->isZero()) EmitBoolCondBranch = false; + uint64_t LoopCount = Cnt.getCount() - Cnt.getParentCount(); + uint64_t ExitCount = Cnt.getLoopExitCount(); + // As long as the condition is true, iterate the loop. - if (EmitBoolCondBranch) - Builder.CreateCondBr(BoolCondVal, LoopBody, LoopExit.getBlock()); + if (EmitBoolCondBranch) { + Builder.CreateCondBr(BoolCondVal, LoopBody, LoopExit.getBlock(), + PGO.createBranchWeights(LoopCount, ExitCount)); + } // Emit the exit block. EmitBlock(LoopExit.getBlock()); + PGO.setCurrentRegionCount(ExitCount + Cnt.getBreakCounter().getCount()); // The DoCond block typically is just a branch if we skipped // emitting a branch, try to erase it. @@ -617,6 +653,8 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S) { } void CodeGenFunction::EmitForStmt(const ForStmt &S) { + RegionCounter Cnt = getPGORegionCounter(&S); + JumpDest LoopExit = getJumpDestInCurrentScope("for.end"); RunCleanupsScope ForScope(*this); @@ -639,6 +677,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S) { // Create a cleanup scope for the condition variable cleanups. RunCleanupsScope ConditionScope(*this); + llvm::BranchInst *CondBr = NULL; if (S.getCond()) { // If the for statement has a condition scope, emit the local variable // declaration. @@ -658,7 +697,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S) { // C99 6.8.5p2/p4: The first substatement is executed if the expression // compares unequal to 0. The condition must be a scalar type. llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond()); - Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock); + CondBr = Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock); if (ExitBlock != LoopExit.getBlock()) { EmitBlock(ExitBlock); @@ -670,6 +709,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S) { // Treat it as a non-zero constant. Don't even create a new block for the // body, just fall into it. } + Cnt.beginRegion(Builder); // If the for loop doesn't have an increment we can just use the // condition as the continue block. Otherwise we'll need to create @@ -679,7 +719,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S) { Continue = getJumpDestInCurrentScope("for.inc"); // Store the blocks to use for break and continue. - BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); + BreakContinueStack.push_back(BreakContinue(LoopExit, Continue, &Cnt)); { // Create a separate cleanup scope for the body, in case it is not @@ -693,9 +733,16 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S) { EmitBlock(Continue.getBlock()); EmitStmt(S.getInc()); } + Cnt.adjustFallThroughCount(); BreakContinueStack.pop_back(); + uint64_t LoopCount = Cnt.getCount(); + uint64_t ExitCount = Cnt.getLoopExitCount(); + if (S.getCond()) + CondBr->setMetadata(llvm::LLVMContext::MD_prof, + PGO.createBranchWeights(LoopCount, ExitCount)); + ConditionScope.ForceCleanup(); EmitBranch(CondBlock); @@ -706,9 +753,12 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S) { // Emit the fall-through block. EmitBlock(LoopExit.getBlock(), true); + PGO.setCurrentRegionCount(ExitCount + Cnt.getBreakCounter().getCount()); } void CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S) { + RegionCounter Cnt = getPGORegionCounter(&S); + JumpDest LoopExit = getJumpDestInCurrentScope("for.end"); RunCleanupsScope ForScope(*this); @@ -739,7 +789,8 @@ void CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S) { // The body is executed if the expression, contextually converted // to bool, is true. llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond()); - Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock); + llvm::BranchInst *CondBr = Builder.CreateCondBr(BoolCondVal, + ForBody, ExitBlock); if (ExitBlock != LoopExit.getBlock()) { EmitBlock(ExitBlock); @@ -747,12 +798,13 @@ void CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S) { } EmitBlock(ForBody); + Cnt.beginRegion(Builder); // Create a block for the increment. In case of a 'continue', we jump there. JumpDest Continue = getJumpDestInCurrentScope("for.inc"); // Store the blocks to use for break and continue. - BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); + BreakContinueStack.push_back(BreakContinue(LoopExit, Continue, &Cnt)); { // Create a separate cleanup scope for the loop variable and body. @@ -764,9 +816,15 @@ void CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S) { // If there is an increment, emit it next. EmitBlock(Continue.getBlock()); EmitStmt(S.getInc()); + Cnt.adjustFallThroughCount(); BreakContinueStack.pop_back(); + uint64_t LoopCount = Cnt.getCount(); + uint64_t ExitCount = Cnt.getLoopExitCount(); + CondBr->setMetadata(llvm::LLVMContext::MD_prof, + PGO.createBranchWeights(LoopCount, ExitCount)); + EmitBranch(CondBlock); ForScope.ForceCleanup(); @@ -776,6 +834,7 @@ void CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S) { // Emit the fall-through block. EmitBlock(LoopExit.getBlock(), true); + PGO.setCurrentRegionCount(ExitCount + Cnt.getBreakCounter().getCount()); } void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) { @@ -789,6 +848,7 @@ void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) { /*init*/ true); } EmitBranchThroughCleanup(ReturnBlock); + PGO.setCurrentRegionCount(0); } /// EmitReturnStmt - Note that due to GCC extensions, this can have an operand @@ -860,6 +920,7 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) { cleanupScope.ForceCleanup(); EmitBranchThroughCleanup(ReturnBlock); + PGO.setCurrentRegionCount(0); } void CodeGenFunction::EmitDeclStmt(const DeclStmt &S) { @@ -882,8 +943,14 @@ void CodeGenFunction::EmitBreakStmt(const BreakStmt &S) { if (HaveInsertPoint()) EmitStopPoint(&S); - JumpDest Block = BreakContinueStack.back().BreakBlock; - EmitBranchThroughCleanup(Block); + BreakContinue &BC = BreakContinueStack.back(); + // We keep track of breaks from the loop so we can differentiate them from + // non-local exits in PGO instrumentation. This only applies to loops, not + // breaks from switch statements. + if (BC.CountBreak) + BC.LoopCnt->getBreakCounter().beginRegion(Builder); + EmitBranchThroughCleanup(BC.BreakBlock); + PGO.setCurrentRegionCount(0); } void CodeGenFunction::EmitContinueStmt(const ContinueStmt &S) { @@ -895,8 +962,12 @@ void CodeGenFunction::EmitContinueStmt(const ContinueStmt &S) { if (HaveInsertPoint()) EmitStopPoint(&S); - JumpDest Block = BreakContinueStack.back().ContinueBlock; - EmitBranchThroughCleanup(Block); + BreakContinue &BC = BreakContinueStack.back(); + // We keep track of continues in the loop so we can differentiate them from + // non-local exits in PGO instrumentation. + BC.LoopCnt->getContinueCounter().beginRegion(Builder); + EmitBranchThroughCleanup(BC.ContinueBlock); + PGO.setCurrentRegionCount(0); } /// EmitCaseStmtRange - If case statement range is not too big then @@ -908,11 +979,14 @@ void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S) { llvm::APSInt LHS = S.getLHS()->EvaluateKnownConstInt(getContext()); llvm::APSInt RHS = S.getRHS()->EvaluateKnownConstInt(getContext()); + RegionCounter CaseCnt = getPGORegionCounter(&S); + // Emit the code for this case. We do this first to make sure it is // properly chained from our predecessor before generating the // switch machinery to enter this block. EmitBlock(createBasicBlock("sw.bb")); llvm::BasicBlock *CaseDest = Builder.GetInsertBlock(); + CaseCnt.beginRegion(Builder); EmitStmt(S.getSubStmt()); // If range is empty, do nothing. @@ -923,7 +997,17 @@ void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S) { // FIXME: parameters such as this should not be hardcoded. if (Range.ult(llvm::APInt(Range.getBitWidth(), 64))) { // Range is small enough to add multiple switch instruction cases. - for (unsigned i = 0, e = Range.getZExtValue() + 1; i != e; ++i) { + uint64_t Total = CaseCnt.getCount() - CaseCnt.getParentCount(); + unsigned NCases = Range.getZExtValue() + 1; + // Divide the weights evenly between the cases, ensuring that the total + // weight is preserved. Ie, a weight of 5 over three cases will be + // distributed as weights of 2, 2, and 1. + uint64_t Weight = Total / NCases, Rem = Total % NCases; + for (unsigned I = 0; I != NCases; ++I) { + if (SwitchWeights) + SwitchWeights->push_back(Weight + (Rem ? 1 : 0)); + if (Rem) + Rem--; SwitchInsn->addCase(Builder.getInt(LHS), CaseDest); LHS++; } @@ -948,7 +1032,19 @@ void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S) { Builder.CreateSub(SwitchInsn->getCondition(), Builder.getInt(LHS)); llvm::Value *Cond = Builder.CreateICmpULE(Diff, Builder.getInt(Range), "inbounds"); - Builder.CreateCondBr(Cond, CaseDest, FalseDest); + + llvm::MDNode *Weights = 0; + if (SwitchWeights) { + uint64_t ThisCount = CaseCnt.getCount() - CaseCnt.getParentCount(); + uint64_t DefaultCount = (*SwitchWeights)[0]; + Weights = PGO.createBranchWeights(ThisCount, DefaultCount); + + // Since we're chaining the switch default through each large case range, we + // need to update the weight for the default, ie, the first case, to include + // this case. + (*SwitchWeights)[0] += ThisCount; + } + Builder.CreateCondBr(Cond, CaseDest, FalseDest, Weights); // Restore the appropriate insertion point. if (RestoreBB) @@ -974,17 +1070,22 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) { return; } + RegionCounter CaseCnt = getPGORegionCounter(&S); llvm::ConstantInt *CaseVal = Builder.getInt(S.getLHS()->EvaluateKnownConstInt(getContext())); - // If the body of the case is just a 'break', and if there was no fallthrough, - // try to not emit an empty block. - if ((CGM.getCodeGenOpts().OptimizationLevel > 0) && + // If the body of the case is just a 'break', try to not emit an empty block. + // If we're profiling or we're not optimizing, leave the block in for better + // debug and coverage analysis. + if (!CGM.getCodeGenOpts().ProfileInstrGenerate && + CGM.getCodeGenOpts().OptimizationLevel > 0 && isa(S.getSubStmt())) { JumpDest Block = BreakContinueStack.back().BreakBlock; // Only do this optimization if there are no cleanups that need emitting. if (isObviouslyBranchWithoutCleanups(Block)) { + if (SwitchWeights) + SwitchWeights->push_back(CaseCnt.getCount() - CaseCnt.getParentCount()); SwitchInsn->addCase(CaseVal, Block.getBlock()); // If there was a fallthrough into this case, make sure to redirect it to @@ -999,6 +1100,9 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) { EmitBlock(createBasicBlock("sw.bb")); llvm::BasicBlock *CaseDest = Builder.GetInsertBlock(); + if (SwitchWeights) + SwitchWeights->push_back(CaseCnt.getCount() - CaseCnt.getParentCount()); + CaseCnt.beginRegion(Builder); SwitchInsn->addCase(CaseVal, CaseDest); // Recursively emitting the statement is acceptable, but is not wonderful for @@ -1016,8 +1120,14 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) { // Otherwise, iteratively add consecutive cases to this switch stmt. while (NextCase && NextCase->getRHS() == 0) { CurCase = NextCase; - llvm::ConstantInt *CaseVal = + llvm::ConstantInt *CaseVal = Builder.getInt(CurCase->getLHS()->EvaluateKnownConstInt(getContext())); + + CaseCnt = getPGORegionCounter(NextCase); + if (SwitchWeights) + SwitchWeights->push_back(CaseCnt.getCount() - CaseCnt.getParentCount()); + CaseCnt.beginRegion(Builder); + SwitchInsn->addCase(CaseVal, CaseDest); NextCase = dyn_cast(CurCase->getSubStmt()); } @@ -1030,7 +1140,22 @@ void CodeGenFunction::EmitDefaultStmt(const DefaultStmt &S) { llvm::BasicBlock *DefaultBlock = SwitchInsn->getDefaultDest(); assert(DefaultBlock->empty() && "EmitDefaultStmt: Default block already defined?"); + + llvm::BasicBlock *SkipCountBB = 0; + if (CGM.getCodeGenOpts().ProfileInstrGenerate) { + // The PGO region here needs to count the number of times the edge occurs, + // so fallthrough into this case will jump past the region counter to the + // skipcount basic block. + SkipCountBB = createBasicBlock("skipcount"); + EmitBranch(SkipCountBB); + } EmitBlock(DefaultBlock); + + RegionCounter Cnt = getPGORegionCounter(&S); + Cnt.beginRegion(Builder, /*AddIncomingFallThrough=*/true); + + if (SkipCountBB) + EmitBlock(SkipCountBB); EmitStmt(S.getSubStmt()); } @@ -1187,7 +1312,8 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S, static bool FindCaseStatementsForValue(const SwitchStmt &S, const llvm::APSInt &ConstantCondValue, SmallVectorImpl &ResultStmts, - ASTContext &C) { + ASTContext &C, + const SwitchCase *&ResultCase) { // First step, find the switch case that is being branched to. We can do this // efficiently by scanning the SwitchCase list. const SwitchCase *Case = S.getSwitchCaseList(); @@ -1230,6 +1356,7 @@ static bool FindCaseStatementsForValue(const SwitchStmt &S, // while (1) { // case 4: ... bool FoundCase = false; + ResultCase = Case; return CollectStatementsForCase(S.getBody(), Case, FoundCase, ResultStmts) != CSFC_Failure && FoundCase; @@ -1245,6 +1372,7 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) { // Handle nested switch statements. llvm::SwitchInst *SavedSwitchInsn = SwitchInsn; + SmallVector *SavedSwitchWeights = SwitchWeights; llvm::BasicBlock *SavedCRBlock = CaseRangeBlock; // See if we can constant fold the condition of the switch and therefore only @@ -1252,8 +1380,14 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) { llvm::APSInt ConstantCondValue; if (ConstantFoldsToSimpleInteger(S.getCond(), ConstantCondValue)) { SmallVector CaseStmts; + const SwitchCase *Case = 0; if (FindCaseStatementsForValue(S, ConstantCondValue, CaseStmts, - getContext())) { + getContext(), Case)) { + PGO.setCurrentRegionCount(0); + if (Case) { + RegionCounter CaseCnt = getPGORegionCounter(Case); + CaseCnt.beginRegion(Builder); + } RunCleanupsScope ExecutedScope(*this); // At this point, we are no longer "within" a switch instance, so @@ -1265,6 +1399,8 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) { // specified series of statements and we're good. for (unsigned i = 0, e = CaseStmts.size(); i != e; ++i) EmitStmt(CaseStmts[i]); + RegionCounter ExitCnt = getPGORegionCounter(&S); + ExitCnt.beginRegion(Builder); // Now we want to restore the saved switch instance so that nested // switches continue to function properly @@ -1282,18 +1418,41 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) { // failure. llvm::BasicBlock *DefaultBlock = createBasicBlock("sw.default"); SwitchInsn = Builder.CreateSwitch(CondV, DefaultBlock); + if (PGO.haveRegionCounts()) { + // Walk the SwitchCase list to find how many there are. + uint64_t DefaultCount = 0; + unsigned NumCases = 0; + for (const SwitchCase *Case = S.getSwitchCaseList(); + Case; + Case = Case->getNextSwitchCase()) { + if (isa(Case)) + DefaultCount = getPGORegionCounter(Case).getCount(); + NumCases += 1; + } + SwitchWeights = new SmallVector(); + SwitchWeights->reserve(NumCases); + // The default needs to be first. We store the edge count, so we already + // know the right weight. + SwitchWeights->push_back(DefaultCount); + } CaseRangeBlock = DefaultBlock; // Clear the insertion point to indicate we are in unreachable code. Builder.ClearInsertionPoint(); + PGO.setCurrentRegionCount(0); // All break statements jump to NextBlock. If BreakContinueStack is non-empty - // then reuse last ContinueBlock. + // then reuse last ContinueBlock and that block's counter. JumpDest OuterContinue; - if (!BreakContinueStack.empty()) - OuterContinue = BreakContinueStack.back().ContinueBlock; + RegionCounter *OuterCount = 0; + if (!BreakContinueStack.empty()) { + BreakContinue &BC = BreakContinueStack.back(); + OuterContinue = BC.ContinueBlock; + OuterCount = BC.LoopCnt; + } - BreakContinueStack.push_back(BreakContinue(SwitchExit, OuterContinue)); + BreakContinueStack.push_back(BreakContinue(SwitchExit, OuterContinue, + OuterCount, /*CountBreak=*/false)); // Emit switch body. EmitStmt(S.getBody()); @@ -1322,8 +1481,20 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) { // Emit continuation. EmitBlock(SwitchExit.getBlock(), true); - + RegionCounter ExitCnt = getPGORegionCounter(&S); + ExitCnt.beginRegion(Builder); + + if (SwitchWeights) { + assert(SwitchWeights->size() == 1 + SwitchInsn->getNumCases() && + "switch weights do not match switch cases"); + // If there's only one jump destination there's no sense weighting it. + if (SwitchWeights->size() > 1) + SwitchInsn->setMetadata(llvm::LLVMContext::MD_prof, + PGO.createBranchWeights(*SwitchWeights)); + delete SwitchWeights; + } SwitchInsn = SavedSwitchInsn; + SwitchWeights = SavedSwitchWeights; CaseRangeBlock = SavedCRBlock; } diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 2bac3485d7..f5e54019dd 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -53,6 +53,7 @@ add_clang_library(clangCodeGen CodeGenModule.cpp CodeGenTBAA.cpp CodeGenTypes.cpp + CodeGenPGO.cpp ItaniumCXXABI.cpp MicrosoftCXXABI.cpp ModuleBuilder.cpp diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp index 2d8c68e5f8..fb668e4a16 100644 --- a/lib/CodeGen/CodeGenFunction.cpp +++ b/lib/CodeGen/CodeGenFunction.cpp @@ -16,6 +16,7 @@ #include "CGCXXABI.h" #include "CGDebugInfo.h" #include "CodeGenModule.h" +#include "CodeGenPGO.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" @@ -44,7 +45,8 @@ CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext) NextCleanupDestIndex(1), FirstBlockInfo(0), EHResumeBlock(0), ExceptionSlot(0), EHSelectorSlot(0), DebugInfo(CGM.getModuleDebugInfo()), DisableDebugInfo(false), DidCallStackSave(false), IndirectBranch(0), - SwitchInsn(0), CaseRangeBlock(0), UnreachableBlock(0), NumReturnExprs(0), + PGO(cgm), SwitchInsn(0), SwitchWeights(0), + CaseRangeBlock(0), UnreachableBlock(0), NumReturnExprs(0), NumSimpleReturnExprs(0), CXXABIThisDecl(0), CXXABIThisValue(0), CXXThisValue(0), CXXDefaultInitExprThis(0), CXXStructorImplicitParamDecl(0), CXXStructorImplicitParamValue(0), @@ -571,6 +573,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, if (CGM.getCodeGenOpts().InstrumentForProfiling) EmitMCountInstrumentation(); + PGO.assignRegionCounters(GD); + if (RetTy->isVoidType()) { // Void type; nothing to return. ReturnValue = 0; @@ -643,6 +647,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, void CodeGenFunction::EmitFunctionBody(FunctionArgList &Args, const Stmt *Body) { + RegionCounter Cnt = getPGORegionCounter(Body); + Cnt.beginRegion(Builder); if (const CompoundStmt *S = dyn_cast(Body)) EmitCompoundStmtWithoutScope(*S); else @@ -772,6 +778,9 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, // a quick pass now to see if we can. if (!CurFn->doesNotThrow()) TryMarkNoThrow(CurFn); + + PGO.emitWriteoutFunction(CurGD); + PGO.destroyRegionCounters(); } /// ContainsLabel - Return true if the statement contains a label in it. If @@ -870,10 +879,13 @@ ConstantFoldsToSimpleInteger(const Expr *Cond, llvm::APSInt &ResultInt) { /// void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, - llvm::BasicBlock *FalseBlock) { + llvm::BasicBlock *FalseBlock, + uint64_t TrueCount) { Cond = Cond->IgnoreParens(); if (const BinaryOperator *CondBOp = dyn_cast(Cond)) { + RegionCounter Cnt = getPGORegionCounter(CondBOp); + // Handle X && Y in a condition. if (CondBOp->getOpcode() == BO_LAnd) { // If we have "1 && X", simplify the code. "0 && X" would have constant @@ -882,7 +894,9 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, if (ConstantFoldsToSimpleInteger(CondBOp->getLHS(), ConstantBool) && ConstantBool) { // br(1 && X) -> br(X). - return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock); + Cnt.beginRegion(Builder); + return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, + TrueCount); } // If we have "X && 1", simplify the code to use an uncond branch. @@ -890,21 +904,28 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, if (ConstantFoldsToSimpleInteger(CondBOp->getRHS(), ConstantBool) && ConstantBool) { // br(X && 1) -> br(X). - return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock); + return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock, + TrueCount); } // Emit the LHS as a conditional. If the LHS conditional is false, we // want to jump to the FalseBlock. llvm::BasicBlock *LHSTrue = createBasicBlock("land.lhs.true"); + // The counter tells us how often we evaluate RHS, and all of TrueCount + // can be propagated to that branch. + uint64_t RHSCount = Cnt.getCount(); ConditionalEvaluation eval(*this); - EmitBranchOnBoolExpr(CondBOp->getLHS(), LHSTrue, FalseBlock); + EmitBranchOnBoolExpr(CondBOp->getLHS(), LHSTrue, FalseBlock, RHSCount); EmitBlock(LHSTrue); // Any temporaries created here are conditional. + Cnt.beginRegion(Builder); eval.begin(*this); - EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock); + EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, TrueCount); eval.end(*this); + Cnt.adjustFallThroughCount(); + Cnt.applyAdjustmentsToRegion(); return; } @@ -916,7 +937,9 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, if (ConstantFoldsToSimpleInteger(CondBOp->getLHS(), ConstantBool) && !ConstantBool) { // br(0 || X) -> br(X). - return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock); + Cnt.beginRegion(Builder); + return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, + TrueCount); } // If we have "X || 0", simplify the code to use an uncond branch. @@ -924,21 +947,31 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, if (ConstantFoldsToSimpleInteger(CondBOp->getRHS(), ConstantBool) && !ConstantBool) { // br(X || 0) -> br(X). - return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock); + return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock, + TrueCount); } // Emit the LHS as a conditional. If the LHS conditional is true, we // want to jump to the TrueBlock. llvm::BasicBlock *LHSFalse = createBasicBlock("lor.lhs.false"); + // We have the count for entry to the RHS and for the whole expression + // being true, so we can divy up True count between the short circuit and + // the RHS. + uint64_t LHSCount = TrueCount - Cnt.getCount(); + uint64_t RHSCount = TrueCount - LHSCount; ConditionalEvaluation eval(*this); - EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, LHSFalse); + EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, LHSFalse, LHSCount); EmitBlock(LHSFalse); // Any temporaries created here are conditional. + Cnt.beginRegion(Builder); eval.begin(*this); - EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock); + EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, RHSCount); + eval.end(*this); + Cnt.adjustFallThroughCount(); + Cnt.applyAdjustmentsToRegion(); return; } @@ -946,8 +979,13 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, if (const UnaryOperator *CondUOp = dyn_cast(Cond)) { // br(!x, t, f) -> br(x, f, t) - if (CondUOp->getOpcode() == UO_LNot) - return EmitBranchOnBoolExpr(CondUOp->getSubExpr(), FalseBlock, TrueBlock); + if (CondUOp->getOpcode() == UO_LNot) { + // Negate the count. + uint64_t FalseCount = PGO.getCurrentRegionCount() - TrueCount; + // Negate the condition and swap the destination blocks. + return EmitBranchOnBoolExpr(CondUOp->getSubExpr(), FalseBlock, TrueBlock, + FalseCount); + } } if (const ConditionalOperator *CondOp = dyn_cast(Cond)) { @@ -955,17 +993,33 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *LHSBlock = createBasicBlock("cond.true"); llvm::BasicBlock *RHSBlock = createBasicBlock("cond.false"); + RegionCounter Cnt = getPGORegionCounter(CondOp); ConditionalEvaluation cond(*this); - EmitBranchOnBoolExpr(CondOp->getCond(), LHSBlock, RHSBlock); + EmitBranchOnBoolExpr(CondOp->getCond(), LHSBlock, RHSBlock, Cnt.getCount()); + + // When computing PGO branch weights, we only know the overall count for + // the true block. This code is essentially doing tail duplication of the + // naive code-gen, introducing new edges for which counts are not + // available. Divide the counts proportionally between the LHS and RHS of + // the conditional operator. + uint64_t LHSScaledTrueCount = 0; + if (TrueCount) { + double LHSRatio = Cnt.getCount() / (double) PGO.getCurrentRegionCount(); + LHSScaledTrueCount = TrueCount * LHSRatio; + } cond.begin(*this); EmitBlock(LHSBlock); - EmitBranchOnBoolExpr(CondOp->getLHS(), TrueBlock, FalseBlock); + Cnt.beginRegion(Builder); + EmitBranchOnBoolExpr(CondOp->getLHS(), TrueBlock, FalseBlock, + LHSScaledTrueCount); cond.end(*this); cond.begin(*this); EmitBlock(RHSBlock); - EmitBranchOnBoolExpr(CondOp->getRHS(), TrueBlock, FalseBlock); + Cnt.beginElseRegion(); + EmitBranchOnBoolExpr(CondOp->getRHS(), TrueBlock, FalseBlock, + TrueCount - LHSScaledTrueCount); cond.end(*this); return; @@ -981,9 +1035,15 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, return; } + // Create branch weights based on the number of times we get here and the + // number of times the condition should be true. + uint64_t CurrentCount = PGO.getCurrentRegionCountWithMin(TrueCount); + llvm::MDNode *Weights = PGO.createBranchWeights(TrueCount, + CurrentCount - TrueCount); + // Emit the code with the fully general case. llvm::Value *CondV = EvaluateExprAsBool(Cond); - Builder.CreateCondBr(CondV, TrueBlock, FalseBlock); + Builder.CreateCondBr(CondV, TrueBlock, FalseBlock, Weights); } /// ErrorUnsupported - Print out an error that codegen doesn't support the diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 1dd5dab110..b06f81b417 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -19,6 +19,7 @@ #include "CGValue.h" #include "EHScopeStack.h" #include "CodeGenModule.h" +#include "CodeGenPGO.h" #include "clang/AST/CharUnits.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" @@ -817,19 +818,36 @@ private: llvm::DenseMap LabelMap; // BreakContinueStack - This keeps track of where break and continue - // statements should jump to. + // statements should jump to and the associated base counter for + // instrumentation. struct BreakContinue { - BreakContinue(JumpDest Break, JumpDest Continue) - : BreakBlock(Break), ContinueBlock(Continue) {} + BreakContinue(JumpDest Break, JumpDest Continue, RegionCounter *LoopCnt, + bool CountBreak = true) + : BreakBlock(Break), ContinueBlock(Continue), LoopCnt(LoopCnt), + CountBreak(CountBreak) {} JumpDest BreakBlock; JumpDest ContinueBlock; + RegionCounter *LoopCnt; + bool CountBreak; }; SmallVector BreakContinueStack; + CodeGenPGO PGO; + +public: + /// Get a counter for instrumentation of the region associated with the given + /// statement. + RegionCounter getPGORegionCounter(const Stmt *S) { + return RegionCounter(PGO, S); + } +private: + /// SwitchInsn - This is nearest current switch instruction. It is null if /// current context is not in a switch. llvm::SwitchInst *SwitchInsn; + /// The branch weights of SwitchInsn when doing instrumentation based PGO. + SmallVector *SwitchWeights; /// CaseRangeBlock - This block holds if condition check for last case /// statement range in current switch instruction. @@ -2413,8 +2431,10 @@ public: /// EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g. for an /// if statement) to the specified blocks. Based on the condition, this might /// try to simplify the codegen of the conditional based on the branch. + /// TrueCount should be the number of times we expect the condition to + /// evaluate to true based on PGO data. void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, - llvm::BasicBlock *FalseBlock); + llvm::BasicBlock *FalseBlock, uint64_t TrueCount); /// \brief Emit a description of a type in a format suitable for passing to /// a runtime sanitizer handler. diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index 7baef23b11..b482677f62 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -20,6 +20,7 @@ #include "CGOpenCLRuntime.h" #include "CodeGenFunction.h" #include "CodeGenTBAA.h" +#include "CodeGenPGO.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/CharUnits.h" @@ -77,7 +78,8 @@ CodeGenModule::CodeGenModule(ASTContext &C, const CodeGenOptions &CGO, ABI(createCXXABI(*this)), VMContext(M.getContext()), TBAA(0), TheTargetCodeGenInfo(0), Types(*this), VTables(*this), ObjCRuntime(0), OpenCLRuntime(0), CUDARuntime(0), DebugInfo(0), ARCData(0), - NoObjCARCExceptionsMetadata(0), RRData(0), CFConstantStringClassRef(0), + NoObjCARCExceptionsMetadata(0), RRData(0), PGOData(0), + CFConstantStringClassRef(0), ConstantStringClassRef(0), NSConstantStringType(0), NSConcreteGlobalBlock(0), NSConcreteStackBlock(0), BlockObjectAssign(0), BlockObjectDispose(0), BlockDescriptorType(0), GenericBlockLiteralType(0), @@ -131,6 +133,9 @@ CodeGenModule::CodeGenModule(ASTContext &C, const CodeGenOptions &CGO, if (C.getLangOpts().ObjCAutoRefCount) ARCData = new ARCEntrypoints(); RRData = new RREntrypoints(); + + if (!CodeGenOpts.InstrProfileInput.empty()) + PGOData = new PGOProfileData(*this, CodeGenOpts.InstrProfileInput); } CodeGenModule::~CodeGenModule() { @@ -2181,6 +2186,10 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD, AddGlobalDtor(Fn, DA->getPriority()); if (D->hasAttr()) AddGlobalAnnotations(D, Fn); + + llvm::Function *PGOInit = CodeGenPGO::emitInitialization(*this); + if (PGOInit) + AddGlobalCtor(PGOInit, 0); } void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) { diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h index 5f26ab968c..9dad092ec6 100644 --- a/lib/CodeGen/CodeGenModule.h +++ b/lib/CodeGen/CodeGenModule.h @@ -85,7 +85,8 @@ namespace CodeGen { class CGCUDARuntime; class BlockFieldFlags; class FunctionArgList; - + class PGOProfileData; + struct OrderGlobalInits { unsigned int priority; unsigned int lex_order; @@ -258,6 +259,7 @@ class CodeGenModule : public CodeGenTypeCache { ARCEntrypoints *ARCData; llvm::MDNode *NoObjCARCExceptionsMetadata; RREntrypoints *RRData; + PGOProfileData *PGOData; // WeakRefReferences - A set of references that have only been seen via // a weakref so far. This is used to remove the weak of the reference if we @@ -479,6 +481,10 @@ public: return *RRData; } + PGOProfileData *getPGOData() const { + return PGOData; + } + llvm::Constant *getStaticLocalDeclAddress(const VarDecl *D) { return StaticLocalDeclMap[D]; } diff --git a/lib/CodeGen/CodeGenPGO.cpp b/lib/CodeGen/CodeGenPGO.cpp new file mode 100644 index 0000000000..d1b57d80c3 --- /dev/null +++ b/lib/CodeGen/CodeGenPGO.cpp @@ -0,0 +1,456 @@ +//===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Instrumentation-based profile-guided optimization +// +//===----------------------------------------------------------------------===// + +#include "CodeGenPGO.h" +#include "CodeGenFunction.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/StmtVisitor.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/Support/FileSystem.h" + +using namespace clang; +using namespace CodeGen; + +static void ReportBadPGOData(CodeGenModule &CGM, const char *Message) { + DiagnosticsEngine &Diags = CGM.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, Message); + Diags.Report(DiagID); +} + +PGOProfileData::PGOProfileData(CodeGenModule &CGM, std::string Path) + : CGM(CGM) { + if (llvm::MemoryBuffer::getFile(Path, DataBuffer)) { + ReportBadPGOData(CGM, "failed to open pgo data file"); + return; + } + + if (DataBuffer->getBufferSize() > std::numeric_limits::max()) { + ReportBadPGOData(CGM, "pgo data file too big"); + return; + } + + // Scan through the data file and map each function to the corresponding + // file offset where its counts are stored. + const char *BufferStart = DataBuffer->getBufferStart(); + const char *BufferEnd = DataBuffer->getBufferEnd(); + const char *CurPtr = BufferStart; + while (CurPtr < BufferEnd) { + // Read the mangled function name. + const char *FuncName = CurPtr; + // FIXME: Something will need to be added to distinguish static functions. + CurPtr = strchr(CurPtr, ' '); + if (!CurPtr) { + ReportBadPGOData(CGM, "pgo data file has malformed function entry"); + return; + } + StringRef MangledName(FuncName, CurPtr - FuncName); + + // Read the number of counters. + char *EndPtr; + unsigned NumCounters = strtol(++CurPtr, &EndPtr, 10); + if (EndPtr == CurPtr || *EndPtr != '\n' || NumCounters <= 0) { + ReportBadPGOData(CGM, "pgo data file has unexpected number of counters"); + return; + } + CurPtr = EndPtr; + + // There is one line for each counter; skip over those lines. + for (unsigned N = 0; N < NumCounters; ++N) { + CurPtr = strchr(++CurPtr, '\n'); + if (!CurPtr) { + ReportBadPGOData(CGM, "pgo data file is missing some counter info"); + return; + } + } + + // Skip over the blank line separating functions. + CurPtr += 2; + + DataOffsets[MangledName] = FuncName - BufferStart; + } +} + +bool PGOProfileData::getFunctionCounts(StringRef MangledName, + std::vector &Counts) { + // Find the relevant section of the pgo-data file. + llvm::StringMap::const_iterator OffsetIter = + DataOffsets.find(MangledName); + if (OffsetIter == DataOffsets.end()) + return true; + const char *CurPtr = DataBuffer->getBufferStart() + OffsetIter->getValue(); + + // Skip over the function name. + CurPtr = strchr(CurPtr, ' '); + assert(CurPtr && "pgo-data has corrupted function entry"); + + // Read the number of counters. + char *EndPtr; + unsigned NumCounters = strtol(++CurPtr, &EndPtr, 10); + assert(EndPtr != CurPtr && *EndPtr == '\n' && NumCounters > 0 && + "pgo-data file has corrupted number of counters"); + CurPtr = EndPtr; + + Counts.reserve(NumCounters); + + for (unsigned N = 0; N < NumCounters; ++N) { + // Read the count value. + uint64_t Count = strtoll(CurPtr, &EndPtr, 10); + if (EndPtr == CurPtr || *EndPtr != '\n') { + ReportBadPGOData(CGM, "pgo-data file has bad count value"); + return true; + } + Counts.push_back(Count); + CurPtr = EndPtr + 1; + } + + // Make sure the number of counters matches up. + if (Counts.size() != NumCounters) { + ReportBadPGOData(CGM, "pgo-data file has inconsistent counters"); + return true; + } + + return false; +} + +void CodeGenPGO::emitWriteoutFunction(GlobalDecl &GD) { + if (!CGM.getCodeGenOpts().ProfileInstrGenerate) + return; + + llvm::LLVMContext &Ctx = CGM.getLLVMContext(); + + llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx); + llvm::Type *Int8PtrTy = llvm::Type::getInt8PtrTy(Ctx); + + llvm::Function *WriteoutF = + CGM.getModule().getFunction("__llvm_pgo_writeout"); + if (!WriteoutF) { + llvm::FunctionType *WriteoutFTy = + llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx), false); + WriteoutF = llvm::Function::Create(WriteoutFTy, + llvm::GlobalValue::InternalLinkage, + "__llvm_pgo_writeout", &CGM.getModule()); + } + WriteoutF->setUnnamedAddr(true); + WriteoutF->addFnAttr(llvm::Attribute::NoInline); + if (CGM.getCodeGenOpts().DisableRedZone) + WriteoutF->addFnAttr(llvm::Attribute::NoRedZone); + + llvm::BasicBlock *BB = WriteoutF->empty() ? + llvm::BasicBlock::Create(Ctx, "", WriteoutF) : &WriteoutF->getEntryBlock(); + + CGBuilderTy PGOBuilder(BB); + + llvm::Instruction *I = BB->getTerminator(); + if (!I) + I = PGOBuilder.CreateRetVoid(); + PGOBuilder.SetInsertPoint(I); + + llvm::Type *Int64PtrTy = llvm::Type::getInt64PtrTy(Ctx); + llvm::Type *Args[] = { + Int8PtrTy, // const char *MangledName + Int32Ty, // uint32_t NumCounters + Int64PtrTy // uint64_t *Counters + }; + llvm::FunctionType *FTy = + llvm::FunctionType::get(PGOBuilder.getVoidTy(), Args, false); + llvm::Constant *EmitFunc = + CGM.getModule().getOrInsertFunction("llvm_pgo_emit", FTy); + + llvm::Constant *MangledName = + CGM.GetAddrOfConstantCString(CGM.getMangledName(GD), "__llvm_pgo_name"); + MangledName = llvm::ConstantExpr::getBitCast(MangledName, Int8PtrTy); + PGOBuilder.CreateCall3(EmitFunc, MangledName, + PGOBuilder.getInt32(NumRegionCounters), + PGOBuilder.CreateBitCast(RegionCounters, Int64PtrTy)); +} + +llvm::Function *CodeGenPGO::emitInitialization(CodeGenModule &CGM) { + llvm::Function *WriteoutF = + CGM.getModule().getFunction("__llvm_pgo_writeout"); + if (!WriteoutF) + return NULL; + + // Create a small bit of code that registers the "__llvm_pgo_writeout" to + // be executed at exit. + llvm::Function *F = CGM.getModule().getFunction("__llvm_pgo_init"); + if (F) + return NULL; + + llvm::LLVMContext &Ctx = CGM.getLLVMContext(); + llvm::FunctionType *FTy = llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx), + false); + F = llvm::Function::Create(FTy, llvm::GlobalValue::InternalLinkage, + "__llvm_pgo_init", &CGM.getModule()); + F->setUnnamedAddr(true); + F->setLinkage(llvm::GlobalValue::InternalLinkage); + F->addFnAttr(llvm::Attribute::NoInline); + if (CGM.getCodeGenOpts().DisableRedZone) + F->addFnAttr(llvm::Attribute::NoRedZone); + + llvm::BasicBlock *BB = llvm::BasicBlock::Create(CGM.getLLVMContext(), "", F); + CGBuilderTy PGOBuilder(BB); + + FTy = llvm::FunctionType::get(PGOBuilder.getVoidTy(), false); + llvm::Type *Params[] = { + llvm::PointerType::get(FTy, 0) + }; + FTy = llvm::FunctionType::get(PGOBuilder.getVoidTy(), Params, false); + + // Inialize the environment and register the local writeout function. + llvm::Constant *PGOInit = + CGM.getModule().getOrInsertFunction("llvm_pgo_init", FTy); + PGOBuilder.CreateCall(PGOInit, WriteoutF); + PGOBuilder.CreateRetVoid(); + + return F; +} + +namespace { + /// A StmtVisitor that fills a map of statements to PGO counters. + struct MapRegionCounters : public ConstStmtVisitor { + /// The next counter value to assign. + unsigned NextCounter; + /// The map of statements to counters. + llvm::DenseMap *CounterMap; + + MapRegionCounters(llvm::DenseMap *CounterMap) : + NextCounter(0), CounterMap(CounterMap) { + } + + void VisitChildren(const Stmt *S) { + for (Stmt::const_child_range I = S->children(); I; ++I) + if (*I) + this->Visit(*I); + } + void VisitStmt(const Stmt *S) { VisitChildren(S); } + + /// Assign a counter to track entry to the function body + void VisitFunctionDecl(const FunctionDecl *S) { + (*CounterMap)[S->getBody()] = NextCounter++; + Visit(S->getBody()); + } + /// Assign a counter to track the block following a label + void VisitLabelStmt(const LabelStmt *S) { + (*CounterMap)[S] = NextCounter++; + Visit(S->getSubStmt()); + } + /// Assign three counters - one for the body of the loop, one for breaks + /// from the loop, and one for continues. + /// + /// The break and continue counters cover all such statements in this loop, + /// and are used in calculations to find the number of times the condition + /// and exit of the loop occur. They are needed so we can differentiate + /// these statements from non-local exits like return and goto. + void VisitWhileStmt(const WhileStmt *S) { + (*CounterMap)[S] = NextCounter; + NextCounter += 3; + Visit(S->getCond()); + Visit(S->getBody()); + } + /// Assign counters for the body of the loop, and for breaks and + /// continues. See VisitWhileStmt. + void VisitDoStmt(const DoStmt *S) { + (*CounterMap)[S] = NextCounter; + NextCounter += 3; + Visit(S->getBody()); + Visit(S->getCond()); + } + /// Assign counters for the body of the loop, and for breaks and + /// continues. See VisitWhileStmt. + void VisitForStmt(const ForStmt *S) { + (*CounterMap)[S] = NextCounter; + NextCounter += 3; + const Expr *E; + if ((E = S->getCond())) + Visit(E); + Visit(S->getBody()); + if ((E = S->getInc())) + Visit(E); + } + /// Assign counters for the body of the loop, and for breaks and + /// continues. See VisitWhileStmt. + void VisitCXXForRangeStmt(const CXXForRangeStmt *S) { + (*CounterMap)[S] = NextCounter; + NextCounter += 3; + const Expr *E; + if ((E = S->getCond())) + Visit(E); + Visit(S->getBody()); + if ((E = S->getInc())) + Visit(E); + } + /// Assign counters for the body of the loop, and for breaks and + /// continues. See VisitWhileStmt. + void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) { + (*CounterMap)[S] = NextCounter; + NextCounter += 3; + Visit(S->getElement()); + Visit(S->getBody()); + } + /// Assign a counter for the exit block of the switch statement. + void VisitSwitchStmt(const SwitchStmt *S) { + (*CounterMap)[S] = NextCounter++; + Visit(S->getCond()); + Visit(S->getBody()); + } + /// Assign a counter for a particular case in a switch. This counts jumps + /// from the switch header as well as fallthrough from the case before this + /// one. + void VisitCaseStmt(const CaseStmt *S) { + (*CounterMap)[S] = NextCounter++; + Visit(S->getSubStmt()); + } + /// Assign a counter for the default case of a switch statement. The count + /// is the number of branches from the loop header to the default, and does + /// not include fallthrough from previous cases. If we have multiple + /// conditional branch blocks from the switch instruction to the default + /// block, as with large GNU case ranges, this is the counter for the last + /// edge in that series, rather than the first. + void VisitDefaultStmt(const DefaultStmt *S) { + (*CounterMap)[S] = NextCounter++; + Visit(S->getSubStmt()); + } + /// Assign a counter for the "then" part of an if statement. The count for + /// the "else" part, if it exists, will be calculated from this counter. + void VisitIfStmt(const IfStmt *S) { + (*CounterMap)[S] = NextCounter++; + Visit(S->getCond()); + Visit(S->getThen()); + if (S->getElse()) + Visit(S->getElse()); + } + /// Assign a counter for the continuation block of a C++ try statement. + void VisitCXXTryStmt(const CXXTryStmt *S) { + (*CounterMap)[S] = NextCounter++; + Visit(S->getTryBlock()); + for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I) + Visit(S->getHandler(I)); + } + /// Assign a counter for a catch statement's handler block. + void VisitCXXCatchStmt(const CXXCatchStmt *S) { + (*CounterMap)[S] = NextCounter++; + Visit(S->getHandlerBlock()); + } + /// Assign a counter for the "true" part of a conditional operator. The + /// count in the "false" part will be calculated from this counter. + void VisitConditionalOperator(const ConditionalOperator *E) { + (*CounterMap)[E] = NextCounter++; + Visit(E->getCond()); + Visit(E->getTrueExpr()); + Visit(E->getFalseExpr()); + } + /// Assign a counter for the right hand side of a logical and operator. + void VisitBinLAnd(const BinaryOperator *E) { + (*CounterMap)[E] = NextCounter++; + Visit(E->getLHS()); + Visit(E->getRHS()); + } + /// Assign a counter for the right hand side of a logical or operator. + void VisitBinLOr(const BinaryOperator *E) { + (*CounterMap)[E] = NextCounter++; + Visit(E->getLHS()); + Visit(E->getRHS()); + } + }; +} + +void CodeGenPGO::assignRegionCounters(GlobalDecl &GD) { + bool InstrumentRegions = CGM.getCodeGenOpts().ProfileInstrGenerate; + PGOProfileData *PGOData = CGM.getPGOData(); + if (!InstrumentRegions && !PGOData) + return; + const Decl *D = GD.getDecl(); + if (!D) + return; + mapRegionCounters(D); + if (InstrumentRegions) + emitCounterVariables(); + if (PGOData) + loadRegionCounts(GD, PGOData); +} + +void CodeGenPGO::mapRegionCounters(const Decl *D) { + RegionCounterMap = new llvm::DenseMap(); + MapRegionCounters Walker(RegionCounterMap); + if (const FunctionDecl *FD = dyn_cast_or_null(D)) + Walker.VisitFunctionDecl(FD); + NumRegionCounters = Walker.NextCounter; +} + +void CodeGenPGO::emitCounterVariables() { + llvm::LLVMContext &Ctx = CGM.getLLVMContext(); + llvm::ArrayType *CounterTy = llvm::ArrayType::get(llvm::Type::getInt64Ty(Ctx), + NumRegionCounters); + RegionCounters = + new llvm::GlobalVariable(CGM.getModule(), CounterTy, false, + llvm::GlobalVariable::PrivateLinkage, + llvm::Constant::getNullValue(CounterTy), + "__llvm_pgo_ctr"); +} + +void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, unsigned Counter) { + if (!CGM.getCodeGenOpts().ProfileInstrGenerate) + return; + llvm::Value *Addr = + Builder.CreateConstInBoundsGEP2_64(RegionCounters, 0, Counter); + llvm::Value *Count = Builder.CreateLoad(Addr, "pgocount"); + Count = Builder.CreateAdd(Count, Builder.getInt64(1)); + Builder.CreateStore(Count, Addr); +} + +void CodeGenPGO::loadRegionCounts(GlobalDecl &GD, PGOProfileData *PGOData) { + // For now, ignore the counts from the PGO data file only if the number of + // counters does not match. This could be tightened down in the future to + // ignore counts when the input changes in various ways, e.g., by comparing a + // hash value based on some characteristics of the input. + RegionCounts = new std::vector(); + if (PGOData->getFunctionCounts(CGM.getMangledName(GD), *RegionCounts) || + RegionCounts->size() != NumRegionCounters) { + delete RegionCounts; + RegionCounts = 0; + } +} + +void CodeGenPGO::destroyRegionCounters() { + if (RegionCounterMap != 0) + delete RegionCounterMap; + if (RegionCounts != 0) + delete RegionCounts; +} + +llvm::MDNode *CodeGenPGO::createBranchWeights(uint64_t TrueCount, + uint64_t FalseCount) { + if (!TrueCount && !FalseCount) + return 0; + + llvm::MDBuilder MDHelper(CGM.getLLVMContext()); + // TODO: need to scale down to 32-bits + // According to Laplace's Rule of Succession, it is better to compute the + // weight based on the count plus 1. + return MDHelper.createBranchWeights(TrueCount + 1, FalseCount + 1); +} + +llvm::MDNode * +CodeGenPGO::createBranchWeights(ArrayRef Weights) { + llvm::MDBuilder MDHelper(CGM.getLLVMContext()); + // TODO: need to scale down to 32-bits, instead of just truncating. + // According to Laplace's Rule of Succession, it is better to compute the + // weight based on the count plus 1. + SmallVector ScaledWeights; + ScaledWeights.reserve(Weights.size()); + for (ArrayRef::iterator WI = Weights.begin(), WE = Weights.end(); + WI != WE; ++WI) { + ScaledWeights.push_back(*WI + 1); + } + return MDHelper.createBranchWeights(ScaledWeights); +} diff --git a/lib/CodeGen/CodeGenPGO.h b/lib/CodeGen/CodeGenPGO.h new file mode 100644 index 0000000000..43dee8689d --- /dev/null +++ b/lib/CodeGen/CodeGenPGO.h @@ -0,0 +1,216 @@ +//===--- CodeGenPGO.h - PGO Instrumentation for LLVM CodeGen ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Instrumentation-based profile-guided optimization +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_CODEGEN_CODEGENPGO_H +#define CLANG_CODEGEN_CODEGENPGO_H + +#include "CGBuilder.h" +#include "CodeGenModule.h" +#include "CodeGenTypes.h" +#include "clang/Frontend/CodeGenOptions.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/MemoryBuffer.h" + +namespace clang { +namespace CodeGen { +class RegionCounter; + +/// The raw counter data from an instrumented PGO binary +class PGOProfileData { +private: + /// The PGO data + llvm::OwningPtr DataBuffer; + /// Offsets into DataBuffer for each function's counters + llvm::StringMap DataOffsets; + CodeGenModule &CGM; +public: + PGOProfileData(CodeGenModule &CGM, std::string Path); + /// Fill Counts with the profile data for the given function name. Returns + /// false on success. + bool getFunctionCounts(StringRef MangledName, std::vector &Counts); +}; + +/// Per-function PGO state. This class should generally not be used directly, +/// but instead through the CodeGenFunction and RegionCounter types. +class CodeGenPGO { +private: + CodeGenModule &CGM; + + unsigned NumRegionCounters; + llvm::GlobalVariable *RegionCounters; + llvm::DenseMap *RegionCounterMap; + std::vector *RegionCounts; + uint64_t CurrentRegionCount; + +public: + CodeGenPGO(CodeGenModule &CGM) + : CGM(CGM), NumRegionCounters(0), RegionCounters(0), RegionCounterMap(0), + RegionCounts(0), CurrentRegionCount(0) {} + ~CodeGenPGO() {} + + /// Whether or not we have PGO region data for the current function. This is + /// false both when we have no data at all and when our data has been + /// discarded. + bool haveRegionCounts() const { return RegionCounts != 0; } + + /// Return the counter value of the current region. + uint64_t getCurrentRegionCount() const { return CurrentRegionCount; } + /// Return the counter value of the current region, or \p Min if it is larger. + uint64_t getCurrentRegionCountWithMin(uint64_t Min) { + return std::max(Min, CurrentRegionCount); + } + /// Set the counter value for the current region. This is used to keep track + /// of changes to the most recent counter from control flow and non-local + /// exits. + void setCurrentRegionCount(uint64_t Count) { CurrentRegionCount = Count; } + + /// Calculate branch weights appropriate for PGO data + llvm::MDNode *createBranchWeights(uint64_t TrueCount, uint64_t FalseCount); + llvm::MDNode *createBranchWeights(ArrayRef Weights); + + /// Assign counters to regions and configure them for PGO of a given + /// function. Does nothing if instrumentation is not enabled and either + /// generates global variables or associates PGO data with each of the + /// counters depending on whether we are generating or using instrumentation. + void assignRegionCounters(GlobalDecl &GD); + /// Emit code to write counts for a given function to disk, if necessary. + void emitWriteoutFunction(GlobalDecl &GD); + /// Clean up region counter state. Must be called if assignRegionCounters is + /// used. + void destroyRegionCounters(); + /// Emit the logic to register region counter write out functions. Returns a + /// function that implements this logic. + static llvm::Function *emitInitialization(CodeGenModule &CGM); + +private: + void mapRegionCounters(const Decl *D); + void loadRegionCounts(GlobalDecl &GD, PGOProfileData *PGOData); + void emitCounterVariables(); + + /// Emit code to increment the counter at the given index + void emitCounterIncrement(CGBuilderTy &Builder, unsigned Counter); + + /// Return the region counter for the given statement. This should only be + /// called on statements that have a dedicated counter. + unsigned getRegionCounter(const Stmt *S) { + if (RegionCounterMap == 0) + return 0; + return (*RegionCounterMap)[S]; + } + + /// Return the region count for the counter at the given index. + uint64_t getRegionCount(unsigned Counter) { + if (!haveRegionCounts()) + return 0; + return (*RegionCounts)[Counter]; + } + + friend class RegionCounter; +}; + +/// A counter for a particular region. This is the primary interface through +/// which clients manage PGO counters and their values. +class RegionCounter { + CodeGenPGO *PGO; + unsigned Counter; + uint64_t Count; + uint64_t ParentCount; + uint64_t RegionCount; + int64_t Adjust; + + RegionCounter(CodeGenPGO &PGO, unsigned CounterIndex) + : PGO(&PGO), Counter(CounterIndex), Count(PGO.getRegionCount(Counter)), + ParentCount(PGO.getCurrentRegionCount()), Adjust(0) {} + +public: + RegionCounter(CodeGenPGO &PGO, const Stmt *S) + : PGO(&PGO), Counter(PGO.getRegionCounter(S)), + Count(PGO.getRegionCount(Counter)), + ParentCount(PGO.getCurrentRegionCount()), Adjust(0) {} + + /// Get the value of the counter. In most cases this is the number of times + /// the region of the counter was entered, but for switch labels it's the + /// number of direct jumps to that label. + uint64_t getCount() const { return Count; } + /// Get the value of the counter with adjustments applied. Adjustments occur + /// when control enters or leaves the region abnormally, ie, if there is a + /// jump to a label within the region, or if the function can return from + /// within the region. The adjusted count, then, is the value of the counter + /// at the end of the region. + uint64_t getAdjustedCount() const { + assert(Adjust > 0 || (uint64_t)(-Adjust) <= Count && "Negative count"); + return Count + Adjust; + } + /// Get the value of the counter in this region's parent, ie, the region that + /// was active when this region began. This is useful for deriving counts in + /// implicitly counted regions, like the false case of a condition or the + /// normal exits of a loop. + uint64_t getParentCount() const { return ParentCount; } + + /// Get the number of times the condition of a loop will evaluate false. This + /// is the number of times we enter the loop, adjusted by the difference + /// between entering and exiting the loop body normally, excepting that + /// 'continue' statements also bring us back here. + /// + /// Undefined if this counter is not counting a loop. + uint64_t getLoopExitCount() const { + return getParentCount() + getContinueCounter().getCount() + + getAdjustedCount() - getCount(); + } + /// Get the associated break counter. Undefined if this counter is not + /// counting a loop. + RegionCounter getBreakCounter() const { + return RegionCounter(*PGO, Counter + 1); + } + /// Get the associated continue counter. Undefined if this counter is not + /// counting a loop. + RegionCounter getContinueCounter() const { + return RegionCounter(*PGO, Counter + 2); + } + + /// Activate the counter by emitting an increment and starting to track + /// adjustments. If AddIncomingFallThrough is true, the current region count + /// will be added to the counter for the purposes of tracking the region. + void beginRegion(CGBuilderTy &Builder, bool AddIncomingFallThrough=false) { + RegionCount = Count; + if (AddIncomingFallThrough) + RegionCount += PGO->getCurrentRegionCount(); + PGO->setCurrentRegionCount(RegionCount); + PGO->emitCounterIncrement(Builder, Counter); + } + /// For counters on boolean branches, begins tracking adjustments for the + /// uncounted path. + void beginElseRegion() { + RegionCount = ParentCount - Count; + PGO->setCurrentRegionCount(RegionCount); + } + + /// Control may either enter or leave the region, so the count at the end may + /// be different from the start. Call this to track that adjustment without + /// modifying the current count. Must not be called before one of beginRegion + /// or beginElseRegion. + void adjustFallThroughCount() { + Adjust += PGO->getCurrentRegionCount() - RegionCount; + } + /// Commit all adjustments to the current region. This should be called after + /// all blocks that adjust the fallthrough count have been emitted. + void applyAdjustmentsToRegion() { + PGO->setCurrentRegionCount(ParentCount + Adjust); + } +}; + +} // end namespace CodeGen +} // end namespace clang + +#endif diff --git a/test/CodeGen/Inputs/instr-profile.pgodata b/test/CodeGen/Inputs/instr-profile.pgodata new file mode 100644 index 0000000000..ee4363aea7 --- /dev/null +++ b/test/CodeGen/Inputs/instr-profile.pgodata @@ -0,0 +1,127 @@ +simple_loops 10 +1 +100 +0 +0 +100 +0 +0 +76 +0 +0 + +conditionals 13 +1 +100 +0 +0 +50 +50 +33 +33 +16 +99 +100 +99 +100 + +early_exits 13 +1 +0 +51 +1 +25 +1 +25 +1 +26 +0 +0 +1 +0 + +jumps 30 +1 +1 +0 +0 +0 +1 +0 +0 +0 +0 +1 +0 +1 +2 +3 +2 +0 +0 +0 +3 +0 +1 +1 +1 +10 +0 +0 +0 +10 +9 + +switches 21 +1 +1 +1 +15 +0 +7 +7 +1 +0 +3 +2 +3 +3 +4 +4 +0 +4 +4 +5 +1 +0 + +big_switch 19 +1 +32 +0 +0 +32 +1 +0 +2 +1 +11 +11 +1 +1 +15 +15 +1 +1 +2 +2 + +no_usable_data 5 +1 +1 +1 +1 +1 + +main 1 +1 diff --git a/test/CodeGen/instr-profile.c b/test/CodeGen/instr-profile.c new file mode 100644 index 0000000000..9f5cffd44b --- /dev/null +++ b/test/CodeGen/instr-profile.c @@ -0,0 +1,428 @@ +// Test that instrumentation based profiling feeds branch prediction +// correctly. This tests both generation of profile data and use of the same, +// and the input file for the -fprofile-instr-use case is expected to be result +// of running the program generated by the -fprofile-instr-generate case +// (excepting no_usable_data). As such, main() should call every function in +// this test. + +// RUN: %clang %s -o - -emit-llvm -S -fprofile-instr-generate | FileCheck -check-prefix=PGOGEN %s +// RUN: %clang %s -o - -emit-llvm -S -fprofile-instr-use=%S/Inputs/instr-profile.pgodata | FileCheck -check-prefix=PGOUSE %s + +// PGOGEN: @[[SLC:__llvm_pgo_ctr[0-9]*]] = private global [10 x i64] zeroinitializer +// PGOGEN: @[[IFC:__llvm_pgo_ctr[0-9]*]] = private global [13 x i64] zeroinitializer +// PGOGEN: @[[EEC:__llvm_pgo_ctr[0-9]*]] = private global [13 x i64] zeroinitializer +// PGOGEN: @[[JMC:__llvm_pgo_ctr[0-9]*]] = private global [30 x i64] zeroinitializer +// PGOGEN: @[[SWC:__llvm_pgo_ctr[0-9]*]] = private global [21 x i64] zeroinitializer +// PGOGEN: @[[BSC:__llvm_pgo_ctr[0-9]*]] = private global [19 x i64] zeroinitializer +// PGOGEN: @[[NOC:__llvm_pgo_ctr[0-9]*]] = private global [2 x i64] zeroinitializer +// PGOGEN: @[[MAC:__llvm_pgo_ctr[0-9]*]] = private global [1 x i64] zeroinitializer + +// PGOGEN-LABEL: @simple_loops() +// PGOUSE-LABEL: @simple_loops() +// PGOGEN: store {{.*}} @[[SLC]], i64 0, i64 0 +void simple_loops() { + int i; + // PGOGEN: store {{.*}} @[[SLC]], i64 0, i64 1 + // PGOUSE: br {{.*}} !prof ![[SL1:[0-9]+]] + for (i = 0; i < 100; ++i) { + } + // PGOGEN: store {{.*}} @[[SLC]], i64 0, i64 4 + // PGOUSE: br {{.*}} !prof ![[SL2:[0-9]+]] + while (i > 0) + i--; + // PGOGEN: store {{.*}} @[[SLC]], i64 0, i64 7 + // PGOUSE: br {{.*}} !prof ![[SL3:[0-9]+]] + do {} while (i++ < 75); + + // PGOGEN-NOT: store {{.*}} @[[SLC]], + // PGOUSE-NOT: br {{.*}} !prof ![0-9]+ +} + +// PGOGEN-LABEL: @conditionals() +// PGOUSE-LABEL: @conditionals() +// PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 0 +void conditionals() { + // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 1 + // PGOUSE: br {{.*}} !prof ![[IF1:[0-9]+]] + for (int i = 0; i < 100; ++i) { + // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 4 + // PGOUSE: br {{.*}} !prof ![[IF2:[0-9]+]] + if (i % 2) { + // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 5 + // PGOUSE: br {{.*}} !prof ![[IF3:[0-9]+]] + if (i) {} + // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 6 + // PGOUSE: br {{.*}} !prof ![[IF4:[0-9]+]] + } else if (i % 3) { + // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 7 + // PGOUSE: br {{.*}} !prof ![[IF5:[0-9]+]] + if (i) {} + } else { + // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 8 + // PGOUSE: br {{.*}} !prof ![[IF6:[0-9]+]] + if (i) {} + } + + // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 10 + // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 9 + // PGOUSE: br {{.*}} !prof ![[IF7:[0-9]+]] + if (1 && i) {} + // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 12 + // PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 11 + // PGOUSE: br {{.*}} !prof ![[IF8:[0-9]+]] + if (0 || i) {} + } + + // PGOGEN-NOT: store {{.*}} @[EEC]], + // PGOUSE-NOT: br {{.*}} !prof ![0-9]+ +} + +// PGOGEN-LABEL: @early_exits() +// PGOUSE-LABEL: @early_exits() +// PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 0 +void early_exits() { + int i = 0; + + // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 1 + // PGOUSE: br {{.*}} !prof ![[EE1:[0-9]+]] + if (i) {} + + // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 2 + // PGOUSE: br {{.*}} !prof ![[EE2:[0-9]+]] + while (i < 100) { + i++; + // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 5 + // PGOUSE: br {{.*}} !prof ![[EE3:[0-9]+]] + if (i > 50) + // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 3 + break; + // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 6 + // PGOUSE: br {{.*}} !prof ![[EE4:[0-9]+]] + if (i % 2) + // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 4 + continue; + } + + // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 7 + // PGOUSE: br {{.*}} !prof ![[EE5:[0-9]+]] + if (i) {} + + // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 8 + do { + // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 11 + // PGOUSE: br {{.*}} !prof ![[EE6:[0-9]+]] + if (i > 75) + return; + else + i++; + // PGOUSE: br {{.*}} !prof ![[EE7:[0-9]+]] + } while (i < 100); + + // PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 12 + // Never reached -> no weights + if (i) {} + + // PGOGEN-NOT: store {{.*}} @[[EEC]], + // PGOUSE-NOT: br {{.*}} !prof ![0-9]+ +} + +// PGOGEN-LABEL: @jumps() +// PGOUSE-LABEL: @jumps() +// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 0 +void jumps() { + int i; + + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 1 + // PGOUSE: br {{.*}} !prof ![[JM1:[0-9]+]] + for (i = 0; i < 2; ++i) { + goto outofloop; + // Never reached -> no weights + if (i) {} + } +// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 5 +outofloop: + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 6 + // PGOUSE: br {{.*}} !prof ![[JM2:[0-9]+]] + if (i) {} + + goto loop1; + + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 7 + // PGOUSE: br {{.*}} !prof ![[JM3:[0-9]+]] + while (i) { + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 10 + loop1: + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 11 + // PGOUSE: br {{.*}} !prof ![[JM4:[0-9]+]] + if (i) {} + } + + goto loop2; +// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 12 +first: +// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 13 +second: +// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 14 +third: + i++; + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 15 + // PGOUSE: br {{.*}} !prof ![[JM5:[0-9]+]] + if (i < 3) + goto loop2; + + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 16 + // PGOUSE: br {{.*}} !prof ![[JM6:[0-9]+]] + while (i < 3) { + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 19 + loop2: + // PGOUSE: switch {{.*}} [ + // PGOUSE: ], !prof ![[JM7:[0-9]+]] + switch (i) { + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 21 + case 0: + goto first; + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 22 + case 1: + goto second; + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 23 + case 2: + goto third; + } + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 20 + } + + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 24 + // PGOUSE: br {{.*}} !prof ![[JM8:[0-9]+]] + for (i = 0; i < 10; ++i) { + goto withinloop; + // never reached -> no weights + if (i) {} + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 28 + withinloop: + // PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 29 + // PGOUSE: br {{.*}} !prof ![[JM9:[0-9]+]] + if (i) {} + } + + // PGOGEN-NOT: store {{.*}} @[[JMC]], + // PGOUSE-NOT: br {{.*}} !prof ![0-9]+ +} + +// PGOGEN-LABEL: @switches() +// PGOUSE-LABEL: @switches() +// PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 0 +void switches() { + static int weights[] = {1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5}; + + // No cases -> no weights + switch (weights[0]) { + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 2 + default: + break; + } + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 1 + + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 3 + // PGOUSE: br {{.*}} !prof ![[SW1:[0-9]+]] + for (int i = 0, len = sizeof(weights) / sizeof(weights[0]); i < len; ++i) { + // PGOUSE: switch {{.*}} [ + // PGOUSE: ], !prof ![[SW2:[0-9]+]] + switch (i[weights]) { + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 7 + case 1: + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 8 + // PGOUSE: br {{.*}} !prof ![[SW3:[0-9]+]] + if (i) {} + // fallthrough + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 9 + case 2: + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 10 + // PGOUSE: br {{.*}} !prof ![[SW4:[0-9]+]] + if (i) {} + break; + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 11 + case 3: + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 12 + // PGOUSE: br {{.*}} !prof ![[SW5:[0-9]+]] + if (i) {} + continue; + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 13 + case 4: + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 14 + // PGOUSE: br {{.*}} !prof ![[SW6:[0-9]+]] + if (i) {} + // PGOUSE: switch {{.*}} [ + // PGOUSE: ], !prof ![[SW7:[0-9]+]] + switch (i) { + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 16 + case 6 ... 9: + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 17 + // PGOUSE: br {{.*}} !prof ![[SW8:[0-9]+]] + if (i) {} + continue; + } + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 15 + + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 18 + default: + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 19 + // PGOUSE: br {{.*}} !prof ![[SW9:[0-9]+]] + if (i == len - 1) + return; + } + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 6 + } + + // PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 20 + // Never reached -> no weights + if (weights[0]) {} + + // PGOGEN-NOT: store {{.*}} @[[SWC]], + // PGOUSE-NOT: br {{.*}} !prof ![0-9]+ +} + +// PGOGEN-LABEL: @big_switch() +// PGOUSE-LABEL: @big_switch() +// PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 0 +void big_switch() { + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 1 + // PGOUSE: br {{.*}} !prof ![[BS1:[0-9]+]] + for (int i = 0; i < 32; ++i) { + // PGOUSE: switch {{.*}} [ + // PGOUSE: ], !prof ![[BS2:[0-9]+]] + switch (1 << i) { + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 5 + case (1 << 0): + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 6 + // PGOUSE: br {{.*}} !prof ![[BS3:[0-9]+]] + if (i) {} + // fallthrough + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 7 + case (1 << 1): + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 8 + // PGOUSE: br {{.*}} !prof ![[BS4:[0-9]+]] + if (i) {} + break; + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 9 + case (1 << 2) ... (1 << 12): + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 10 + // PGOUSE: br {{.*}} !prof ![[BS5:[0-9]+]] + if (i) {} + break; + // The branch for the large case range above appears after the case body + // PGOUSE: br {{.*}} !prof ![[BS6:[0-9]+]] + + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 11 + case (1 << 13): + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 12 + // PGOUSE: br {{.*}} !prof ![[BS7:[0-9]+]] + if (i) {} + break; + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 13 + case (1 << 14) ... (1 << 28): + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 14 + // PGOUSE: br {{.*}} !prof ![[BS8:[0-9]+]] + if (i) {} + break; + // The branch for the large case range above appears after the case body + // PGOUSE: br {{.*}} !prof ![[BS9:[0-9]+]] + + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 15 + case (1 << 29) ... ((1 << 29) + 1): + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 16 + // PGOUSE: br {{.*}} !prof ![[BS10:[0-9]+]] + if (i) {} + break; + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 17 + default: + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 18 + // PGOUSE: br {{.*}} !prof ![[BS11:[0-9]+]] + if (i) {} + break; + } + // PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 4 + } + + // PGOGEN-NOT: store {{.*}} @[[BSC]], + // PGOUSE-NOT: br {{.*}} !prof ![0-9]+ + // PGOUSE: ret void +} + +// PGOGEN-LABEL: @no_usable_data() +// PGOUSE-LABEL: @no_usable_data() +// PGOGEN: store {{.*}} @[[NOC]], i64 0, i64 0 +void no_usable_data() { + // The input data for PGOUSE is deliberately invalid for this function, so + // that we can test that we reject and ignore it properly. + int i = 0; + + // PGOGEN: store {{.*}} @[[NOC]], i64 0, i64 1 + if (i) {} + + // PGOGEN-NOT: store {{.*}} @[[NOC]], + // PGOUSE-NOT: br {{.*}} !prof ![0-9]+ +} + +// PGOUSE-DAG: ![[SL1]] = metadata !{metadata !"branch_weights", i32 101, i32 2} +// PGOUSE-DAG: ![[SL2]] = metadata !{metadata !"branch_weights", i32 101, i32 2} +// PGOUSE-DAG: ![[SL3]] = metadata !{metadata !"branch_weights", i32 76, i32 2} + +// PGOUSE-DAG: ![[EE1]] = metadata !{metadata !"branch_weights", i32 1, i32 2} +// PGOUSE-DAG: ![[EE2]] = metadata !{metadata !"branch_weights", i32 52, i32 1} +// PGOUSE-DAG: ![[EE3]] = metadata !{metadata !"branch_weights", i32 2, i32 51} +// PGOUSE-DAG: ![[EE4]] = metadata !{metadata !"branch_weights", i32 26, i32 26} +// PGOUSE-DAG: ![[EE5]] = metadata !{metadata !"branch_weights", i32 2, i32 1} +// PGOUSE-DAG: ![[EE6]] = metadata !{metadata !"branch_weights", i32 2, i32 26} +// PGOUSE-DAG: ![[EE7]] = metadata !{metadata !"branch_weights", i32 26, i32 1} + +// PGOUSE-DAG: ![[IF1]] = metadata !{metadata !"branch_weights", i32 101, i32 2} +// PGOUSE-DAG: ![[IF2]] = metadata !{metadata !"branch_weights", i32 51, i32 51} +// PGOUSE-DAG: ![[IF3]] = metadata !{metadata !"branch_weights", i32 51, i32 1} +// PGOUSE-DAG: ![[IF4]] = metadata !{metadata !"branch_weights", i32 34, i32 18} +// PGOUSE-DAG: ![[IF5]] = metadata !{metadata !"branch_weights", i32 34, i32 1} +// PGOUSE-DAG: ![[IF6]] = metadata !{metadata !"branch_weights", i32 17, i32 2} +// PGOUSE-DAG: ![[IF7]] = metadata !{metadata !"branch_weights", i32 100, i32 2} +// PGOUSE-DAG: ![[IF8]] = metadata !{metadata !"branch_weights", i32 100, i32 2} + +// PGOUSE-DAG: ![[JM1]] = metadata !{metadata !"branch_weights", i32 2, i32 1} +// PGOUSE-DAG: ![[JM2]] = metadata !{metadata !"branch_weights", i32 1, i32 2} +// PGOUSE-DAG: ![[JM3]] = metadata !{metadata !"branch_weights", i32 1, i32 2} +// PGOUSE-DAG: ![[JM4]] = metadata !{metadata !"branch_weights", i32 1, i32 2} +// PGOUSE-DAG: ![[JM5]] = metadata !{metadata !"branch_weights", i32 3, i32 2} +// PGOUSE-DAG: ![[JM6]] = metadata !{metadata !"branch_weights", i32 1, i32 2} +// PGOUSE-DAG: ![[JM7]] = metadata !{metadata !"branch_weights", i32 1, i32 2, i32 2, i32 2} +// PGOUSE-DAG: ![[JM8]] = metadata !{metadata !"branch_weights", i32 11, i32 2} +// PGOUSE-DAG: ![[JM9]] = metadata !{metadata !"branch_weights", i32 10, i32 2} + +// PGOUSE-DAG: ![[SW1]] = metadata !{metadata !"branch_weights", i32 16, i32 1} +// PGOUSE-DAG: ![[SW2]] = metadata !{metadata !"branch_weights", i32 6, i32 2, i32 3, i32 4, i32 5} +// PGOUSE-DAG: ![[SW3]] = metadata !{metadata !"branch_weights", i32 1, i32 2} +// PGOUSE-DAG: ![[SW4]] = metadata !{metadata !"branch_weights", i32 3, i32 2} +// PGOUSE-DAG: ![[SW5]] = metadata !{metadata !"branch_weights", i32 4, i32 1} +// PGOUSE-DAG: ![[SW6]] = metadata !{metadata !"branch_weights", i32 5, i32 1} +// PGOUSE-DAG: ![[SW7]] = metadata !{metadata !"branch_weights", i32 1, i32 2, i32 2, i32 2, i32 2} +// PGOUSE-DAG: ![[SW8]] = metadata !{metadata !"branch_weights", i32 5, i32 1} +// PGOUSE-DAG: ![[SW9]] = metadata !{metadata !"branch_weights", i32 2, i32 5} + +// PGOUSE-DAG: ![[BS1]] = metadata !{metadata !"branch_weights", i32 33, i32 2} +// PGOUSE-DAG: ![[BS2]] = metadata !{metadata !"branch_weights", i32 29, i32 2, i32 2, i32 2, i32 2, i32 1} +// PGOUSE-DAG: ![[BS3]] = metadata !{metadata !"branch_weights", i32 1, i32 2} +// PGOUSE-DAG: ![[BS4]] = metadata !{metadata !"branch_weights", i32 2, i32 2} +// PGOUSE-DAG: ![[BS5]] = metadata !{metadata !"branch_weights", i32 12, i32 1} +// PGOUSE-DAG: ![[BS6]] = metadata !{metadata !"branch_weights", i32 12, i32 3} +// PGOUSE-DAG: ![[BS7]] = metadata !{metadata !"branch_weights", i32 2, i32 1} +// PGOUSE-DAG: ![[BS8]] = metadata !{metadata !"branch_weights", i32 16, i32 1} +// PGOUSE-DAG: ![[BS9]] = metadata !{metadata !"branch_weights", i32 16, i32 14} +// PGOUSE-DAG: ![[BS10]] = metadata !{metadata !"branch_weights", i32 2, i32 1} +// PGOUSE-DAG: ![[BS11]] = metadata !{metadata !"branch_weights", i32 3, i32 1} + +int main(int argc, const char *argv[]) { + simple_loops(); + conditionals(); + early_exits(); + jumps(); + switches(); + big_switch(); + no_usable_data(); + return 0; +} diff --git a/test/CodeGenCXX/Inputs/instr-profile.pgodata b/test/CodeGenCXX/Inputs/instr-profile.pgodata new file mode 100644 index 0000000000..1b64261f5c --- /dev/null +++ b/test/CodeGenCXX/Inputs/instr-profile.pgodata @@ -0,0 +1,16 @@ +_Z6throwsv 11 +1 +100 +0 +0 +100 +66 +33 +17 +50 +33 +100 + +main 1 +1 + diff --git a/test/CodeGenCXX/instr-profile.cpp b/test/CodeGenCXX/instr-profile.cpp new file mode 100644 index 0000000000..0f5ffa32ed --- /dev/null +++ b/test/CodeGenCXX/instr-profile.cpp @@ -0,0 +1,73 @@ +// Test that instrumentation based profiling feeds branch prediction +// correctly. This tests both generation of profile data and use of the same, +// and the input file for the -fprofile-instr-use case is expected to be result +// of running the program generated by the -fprofile-instr-generate case. As +// such, main() should call every function in this test. + +// RUN: %clangxx %s -o - -emit-llvm -S -fprofile-instr-generate | FileCheck -check-prefix=PGOGEN %s +// RUN: %clangxx %s -o - -emit-llvm -S -fprofile-instr-generate | FileCheck -check-prefix=PGOGEN-EXC %s + +// RUN: %clang %s -o - -emit-llvm -S -fprofile-instr-use=%S/Inputs/instr-profile.pgodata | FileCheck -check-prefix=PGOUSE %s +// RUN: %clang %s -o - -emit-llvm -S -fprofile-instr-use=%S/Inputs/instr-profile.pgodata | FileCheck -check-prefix=PGOUSE-EXC %s + +// PGOGEN: @[[THC:__llvm_pgo_ctr[0-9]*]] = private global [11 x i64] zeroinitializer +// PGOGEN-EXC: @[[THC:__llvm_pgo_ctr[0-9]*]] = private global [11 x i64] zeroinitializer + +// PGOGEN-LABEL: @_Z6throwsv() +// PGOUSE-LABEL: @_Z6throwsv() +// PGOGEN: store {{.*}} @[[THC]], i64 0, i64 0 +void throws() { + // PGOGEN: store {{.*}} @[[THC]], i64 0, i64 1 + // PGOUSE: br {{.*}} !prof ![[TH1:[0-9]+]] + for (int i = 0; i < 100; ++i) { + try { + // PGOGEN: store {{.*}} @[[THC]], i64 0, i64 5 + // PGOUSE: br {{.*}} !prof ![[TH2:[0-9]+]] + if (i % 3) { + // PGOGEN: store {{.*}} @[[THC]], i64 0, i64 6 + // PGOUSE: br {{.*}} !prof ![[TH3:[0-9]+]] + if (i < 50) + throw 1; + } else { + // The catch block may be emitted after the throw above, we can skip it + // by looking for an else block, but this will break if anyone puts an + // else in the catch + // PGOUSE: if.else{{.*}}: + // PGOGEN: if.else{{.*}}: + + // PGOGEN: store {{.*}} @[[THC]], i64 0, i64 7 + // PGOUSE: br {{.*}} !prof ![[TH4:[0-9]+]] + if (i >= 50) + throw 0; + } + } catch (int e) { + // PGOUSE-EXC: catch{{.*}}: + // PGOGEN-EXC: catch{{.*}}: + + // PGOGEN-EXC: store {{.*}} @[[THC]], i64 0, i64 8 + // PGOGEN-EXC: store {{.*}} @[[THC]], i64 0, i64 9 + // PGOUSE-EXC: br {{.*}} !prof ![[TH5:[0-9]+]] + if (e) {} + } + // PGOGEN: store {{.*}} @[[THC]], i64 0, i64 4 + + // PGOGEN: store {{.*}} @[[THC]], i64 0, i64 10 + // PGOUSE: br {{.*}} !prof ![[TH6:[0-9]+]] + if (i < 100) {} + } + + // PGOUSE-NOT: br {{.*}} !prof ![0-9]+ + // PGOUSE: ret void +} + +// PGOUSE-DAG: ![[TH1]] = metadata !{metadata !"branch_weights", i32 101, i32 2} +// PGOUSE-DAG: ![[TH2]] = metadata !{metadata !"branch_weights", i32 67, i32 35} +// PGOUSE-DAG: ![[TH3]] = metadata !{metadata !"branch_weights", i32 34, i32 34} +// PGOUSE-DAG: ![[TH4]] = metadata !{metadata !"branch_weights", i32 18, i32 18} +// PGOUSE-EXC: ![[TH5]] = metadata !{metadata !"branch_weights", i32 34, i32 18} +// PGOUSE-DAG: ![[TH6]] = metadata !{metadata !"branch_weights", i32 101, i32 1} + +int main(int argc, const char *argv[]) { + throws(); + return 0; +} -- 2.40.0