From: Duncan P. N. Exon Smith Date: Tue, 11 Mar 2014 18:18:10 +0000 (+0000) Subject: PGO: Scale large counters down to 32-bits X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5644f17e6ac6a9af616475cd80c4e1794b69226e;p=clang PGO: Scale large counters down to 32-bits PGO counters are 64-bit and branch weights are 32-bit. Scale them down when necessary, instead of just taking the lower 32 bits. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@203592 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CodeGenPGO.cpp b/lib/CodeGen/CodeGenPGO.cpp index cdd411cfa3..ed92de6e6d 100644 --- a/lib/CodeGen/CodeGenPGO.cpp +++ b/lib/CodeGen/CodeGenPGO.cpp @@ -872,29 +872,59 @@ void CodeGenPGO::destroyRegionCounters() { delete RegionCounts; } +/// \brief Calculate what to divide by to scale weights. +/// +/// Given the maximum weight, calculate a divisor that will scale all the +/// weights to strictly less than UINT32_MAX. +static uint64_t calculateWeightScale(uint64_t MaxWeight) { + return MaxWeight < UINT32_MAX ? 1 : MaxWeight / UINT32_MAX + 1; +} + +/// \brief Scale an individual branch weight (and add 1). +/// +/// Scale a 64-bit weight down to 32-bits using \c Scale. +/// +/// According to Laplace's Rule of Succession, it is better to compute the +/// weight based on the count plus 1, so universally add 1 to the value. +/// +/// \pre \c Scale was calculated by \a calculateWeightScale() with a weight no +/// greater than \c Weight. +static uint32_t scaleBranchWeight(uint64_t Weight, uint64_t Scale) { + assert(Scale && "scale by 0?"); + uint64_t Scaled = Weight / Scale + 1; + assert(Scaled <= UINT32_MAX && "overflow 32-bits"); + return Scaled; +} + llvm::MDNode *CodeGenPGO::createBranchWeights(uint64_t TrueCount, uint64_t FalseCount) { + // Check for empty weights. if (!TrueCount && !FalseCount) return 0; + // Calculate how to scale down to 32-bits. + uint64_t Scale = calculateWeightScale(std::max(TrueCount, FalseCount)); + llvm::MDBuilder MDHelper(CGM.getLLVMContext()); - // TODO: need to scale down to 32-bits - // According to Laplace's Rule of Succession, it is better to compute the - // weight based on the count plus 1. - return MDHelper.createBranchWeights(TrueCount + 1, FalseCount + 1); + return MDHelper.createBranchWeights(scaleBranchWeight(TrueCount, Scale), + scaleBranchWeight(FalseCount, Scale)); } llvm::MDNode *CodeGenPGO::createBranchWeights(ArrayRef Weights) { - llvm::MDBuilder MDHelper(CGM.getLLVMContext()); - // TODO: need to scale down to 32-bits, instead of just truncating. - // According to Laplace's Rule of Succession, it is better to compute the - // weight based on the count plus 1. + // We need at least two elements to create meaningful weights. + if (Weights.size() < 2) + return 0; + + // Calculate how to scale down to 32-bits. + uint64_t Scale = calculateWeightScale(*std::max_element(Weights.begin(), + Weights.end())); + SmallVector ScaledWeights; ScaledWeights.reserve(Weights.size()); - for (ArrayRef::iterator WI = Weights.begin(), WE = Weights.end(); - WI != WE; ++WI) { - ScaledWeights.push_back(*WI + 1); - } + for (uint64_t W : Weights) + ScaledWeights.push_back(scaleBranchWeight(W, Scale)); + + llvm::MDBuilder MDHelper(CGM.getLLVMContext()); return MDHelper.createBranchWeights(ScaledWeights); } diff --git a/test/Profile/Inputs/c-counter-overflows.profdata b/test/Profile/Inputs/c-counter-overflows.profdata new file mode 100644 index 0000000000..377a08502d --- /dev/null +++ b/test/Profile/Inputs/c-counter-overflows.profdata @@ -0,0 +1,10 @@ +main 8 +1 +68719476720 +64424509425 +68719476720 +21474836475 +21474836475 +21474836475 +4294967295 + diff --git a/test/Profile/c-counter-overflows.c b/test/Profile/c-counter-overflows.c new file mode 100644 index 0000000000..ddbe6d1c97 --- /dev/null +++ b/test/Profile/c-counter-overflows.c @@ -0,0 +1,49 @@ +// Test that big branch weights get scaled down to 32-bits, rather than just +// truncated. + +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-counter-overflows.c %s -o - -emit-llvm -fprofile-instr-use=%S/Inputs/c-counter-overflows.profdata | FileCheck %s + +#include + +// PGOGEN: @[[MAIN:__llvm_pgo_ctr[0-9]*]] = private global [2 x i64] zeroinitializer +int main(int argc, const char *argv[]) { + // Need counts higher than 32-bits. + // CHECK: br {{.*}} !prof ![[FOR:[0-9]+]] + // max = 0xffffffff0 + // scale = 0xffffffff0 / 0xffffffff + 1 = 17 + // loop-body: 0xffffffff0 / 17 + 1 = 0xf0f0f0f0 + 1 = 4042322161 => -252645135 + // loop-exit: 0x000000001 / 17 + 1 = 0x00000000 + 1 = 1 => 1 + for (uint64_t I = 0; I < 0xffffffff0; ++I) { + // max = 0xffffffff * 15 = 0xefffffff1 + // scale = 0xefffffff1 / 0xffffffff + 1 = 16 + // CHECK: br {{.*}} !prof ![[IF:[0-9]+]] + if (I & 0xf) { + // 0xefffffff1 / 16 + 1 = 0xefffffff + 1 = 4026531840 => -268435456 + } else { + // 0x0ffffffff / 16 + 1 = 0x0fffffff + 1 = 268435456 => 268435456 + } + + // max = 0xffffffff * 5 = 0x4fffffffb + // scale = 0x4fffffffb / 0xffffffff + 1 = 6 + // CHECK: ], !prof ![[SWITCH:[0-9]+]] + switch ((I & 0xf) / 5) { + case 0: + // 0x4fffffffb / 6 = 0xd5555554 + 1 = 3579139413 => -715827883 + break; + case 1: + // 0x4fffffffb / 6 = 0xd5555554 + 1 = 3579139413 => -715827883 + break; + case 2: + // 0x4fffffffb / 6 = 0xd5555554 + 1 = 3579139413 => -715827883 + break; + default: + // 0x0ffffffff / 6 = 0x2aaaaaaa + 1 = 715827883 => 715827883 + break; + } + } + return 0; +} + +// CHECK-DAG: ![[FOR]] = metadata !{metadata !"branch_weights", i32 -252645135, i32 1} +// CHECK-DAG: ![[IF]] = metadata !{metadata !"branch_weights", i32 -268435456, i32 268435456} +// CHECK-DAG: ![[SWITCH]] = metadata !{metadata !"branch_weights", i32 715827883, i32 -715827883, i32 -715827883, i32 -715827883}