#include "clang/AST/StmtVisitor.h"
#include "llvm/Config/config.h" // for strtoull()/strtoul() define
#include "llvm/IR/MDBuilder.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MD5.h"
using namespace clang;
using namespace CodeGen;
}
namespace {
+/// \brief Stable hasher for PGO region counters.
+///
+/// PGOHash produces a stable hash of a given function's control flow.
+///
+/// Changing the output of this hash will invalidate all previously generated
+/// profiles -- i.e., don't do it.
+///
+/// \note When this hash does eventually change (years?), we still need to
+/// support old hashes. We'll need to pull in the version number from the
+/// profile data format and use the matching hash function.
+class PGOHash {
+ uint64_t Working;
+ unsigned Count;
+ llvm::MD5 MD5;
+
+ static const int NumBitsPerType = 6;
+ static const unsigned NumTypesPerWord = sizeof(uint64_t) * 8 / NumBitsPerType;
+ static const unsigned TooBig = 1u << NumBitsPerType;
+
+public:
+ /// \brief Hash values for AST nodes.
+ ///
+ /// Distinct values for AST nodes that have region counters attached.
+ ///
+ /// These values must be stable. All new members must be added at the end,
+ /// and no members should be removed. Changing the enumeration value for an
+ /// AST node will affect the hash of every function that contains that node.
+ enum HashType : unsigned char {
+ None = 0,
+ LabelStmt = 1,
+ WhileStmt,
+ DoStmt,
+ ForStmt,
+ CXXForRangeStmt,
+ ObjCForCollectionStmt,
+ SwitchStmt,
+ CaseStmt,
+ DefaultStmt,
+ IfStmt,
+ CXXTryStmt,
+ CXXCatchStmt,
+ ConditionalOperator,
+ BinaryOperatorLAnd,
+ BinaryOperatorLOr,
+ BinaryConditionalOperator,
+
+ // Keep this last. It's for the static assert that follows.
+ LastHashType
+ };
+ static_assert(LastHashType <= TooBig, "Too many types in HashType");
+
+ // TODO: When this format changes, take in a version number here, and use the
+ // old hash calculation for file formats that used the old hash.
+ PGOHash() : Working(0), Count(0) {}
+ void combine(HashType Type);
+ uint64_t finalize();
+};
+const int PGOHash::NumBitsPerType;
+const unsigned PGOHash::NumTypesPerWord;
+const unsigned PGOHash::TooBig;
+
/// A RecursiveASTVisitor that fills a map of statements to PGO counters.
struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
/// The next counter value to assign.
unsigned NextCounter;
+ /// The function hash.
+ PGOHash Hash;
/// The map of statements to counters.
llvm::DenseMap<const Stmt *, unsigned> &CounterMap;
}
bool VisitStmt(const Stmt *S) {
+ auto Type = getHashType(S);
+ if (Type == PGOHash::None)
+ return true;
+
+ CounterMap[S] = NextCounter++;
+ Hash.combine(Type);
+ return true;
+ }
+ PGOHash::HashType getHashType(const Stmt *S) {
switch (S->getStmtClass()) {
default:
break;
case Stmt::LabelStmtClass:
+ return PGOHash::LabelStmt;
case Stmt::WhileStmtClass:
+ return PGOHash::WhileStmt;
case Stmt::DoStmtClass:
+ return PGOHash::DoStmt;
case Stmt::ForStmtClass:
+ return PGOHash::ForStmt;
case Stmt::CXXForRangeStmtClass:
+ return PGOHash::CXXForRangeStmt;
case Stmt::ObjCForCollectionStmtClass:
+ return PGOHash::ObjCForCollectionStmt;
case Stmt::SwitchStmtClass:
+ return PGOHash::SwitchStmt;
case Stmt::CaseStmtClass:
+ return PGOHash::CaseStmt;
case Stmt::DefaultStmtClass:
+ return PGOHash::DefaultStmt;
case Stmt::IfStmtClass:
+ return PGOHash::IfStmt;
case Stmt::CXXTryStmtClass:
+ return PGOHash::CXXTryStmt;
case Stmt::CXXCatchStmtClass:
+ return PGOHash::CXXCatchStmt;
case Stmt::ConditionalOperatorClass:
+ return PGOHash::ConditionalOperator;
case Stmt::BinaryConditionalOperatorClass:
- CounterMap[S] = NextCounter++;
- break;
+ return PGOHash::BinaryConditionalOperator;
case Stmt::BinaryOperatorClass: {
const BinaryOperator *BO = cast<BinaryOperator>(S);
- if (BO->getOpcode() == BO_LAnd || BO->getOpcode() == BO_LOr)
- CounterMap[S] = NextCounter++;
+ if (BO->getOpcode() == BO_LAnd)
+ return PGOHash::BinaryOperatorLAnd;
+ if (BO->getOpcode() == BO_LOr)
+ return PGOHash::BinaryOperatorLOr;
break;
}
}
- return true;
+ return PGOHash::None;
}
};
};
}
+void PGOHash::combine(HashType Type) {
+ // Check that we never combine 0 and only have six bits.
+ assert(Type && "Hash is invalid: unexpected type 0");
+ assert(unsigned(Type) < TooBig && "Hash is invalid: too many types");
+
+ // Pass through MD5 if enough work has built up.
+ if (Count && Count % NumTypesPerWord == 0) {
+ using namespace llvm::support;
+ uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
+ MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
+ Working = 0;
+ }
+
+ // Accumulate the current type.
+ ++Count;
+ Working = Working << NumBitsPerType | Type;
+}
+
+uint64_t PGOHash::finalize() {
+ // Use Working as the hash directly if we never used MD5.
+ if (Count <= NumTypesPerWord)
+ // No need to byte swap here, since none of the math was endian-dependent.
+ // This number will be byte-swapped as required on endianness transitions,
+ // so we will see the same value on the other side.
+ return Working;
+
+ // Check for remaining work in Working.
+ if (Working)
+ MD5.update(Working);
+
+ // Finalize the MD5 and return the hash.
+ llvm::MD5::MD5Result Result;
+ MD5.final(Result);
+ using namespace llvm::support;
+ return endian::read<uint64_t, little, unaligned>(Result);
+}
+
static void emitRuntimeHook(CodeGenModule &CGM) {
const char *const RuntimeVarName = "__llvm_profile_runtime";
const char *const RuntimeUserName = "__llvm_profile_runtime_user";
else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
Walker.TraverseDecl(const_cast<CapturedDecl *>(CD));
NumRegionCounters = Walker.NextCounter;
- // FIXME: The number of counters isn't sufficient for the hash
- FunctionHash = NumRegionCounters;
+ FunctionHash = Walker.Hash.finalize();
}
void CodeGenPGO::computeRegionCounts(const Decl *D) {
124750
main
-6
+219169028
6
1
0
c-captured.c:__captured_stmt
-2
+10
2
1
1
c-captured.c:__captured_stmt1
-3
+266
3
1
10
1
main
-1
+0
1
1
debug_captured
-3
+650
3
1
1
main
-8
+285734896137
8
1
68719476720
simple_loops
-4
+16515
4
1
100
75
conditionals
-11
+74917022372782735
11
1
100
100
early_exits
-9
+44128811889290
9
1
0
0
jumps
-22
+2016037664281362839
22
1
1
9
switches
-19
+2745195701975551402
19
1
1
0
big_switch
-17
+10218718452081869619
17
1
32
2
boolean_operators
-8
+291222909838
8
1
100
50
boolop_loops
-9
+9760565944591
9
1
50
26
conditional_operator
-3
+848
3
1
0
1
do_fallthrough
-4
+16586
4
1
10
8
main
-1
+0
1
1
c-general.c:static_func
-2
+4
2
1
10
no_usable_data
-3
+650
3
1
0
0
main
-1
+0
1
1
never_called
-9
+44257542701577
9
0
0
1
dead_code
-10
+2859007309808137
10
1
0
_Z14simple_wrapperv
-2
+4
2
1
100
main
-1
+0
1
1
_ZN6SimpleD1Ev
-2
+10
2
0
0
_ZN6SimpleD2Ev
-2
+10
2
100
99
_ZN6Simple6methodEv
-2
+10
2
100
99
_ZN6SimpleC1Ei
-2
+10
2
0
0
_ZN6SimpleC2Ei
-2
+10
2
100
99
cxx-lambda.cpp:_ZZ7lambdasvENK3$_0clEi
-3
+654
3
10
9
9
main
-1
+0
1
1
_Z7lambdasv
-4
+41226
4
1
1
main
-1
+0
1
1
_Z4loopILj0EEvv
-2
+4
2
1
0
_Z4loopILj100EEvv
-2
+4
2
1
100
_Z6throwsv
-9
+18359008150154
9
1
100
100
main
-1
+0
1
1
objc-general.m:__13+[A foreach:]_block_invoke
-2
+10
2
2
1
objc-general.m:+[A foreach:]
-2
+6
2
1
2
main
-1
+0
1
1
// CHECK: @__llvm_profile_counters_foo = linkonce_odr global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
// CHECK: @__llvm_profile_name_foo = linkonce_odr constant [3 x i8] c"foo", section "__DATA,__llvm_prf_names", align 1
-// CHECK: @__llvm_profile_data_foo = linkonce_odr constant { i32, i32, i64, i8*, i64* } { i32 3, i32 1, i64 1, i8* getelementptr inbounds ([3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64]* @__llvm_profile_counters_foo, i32 0, i32 0) }, section "__DATA,__llvm_prf_data", align 8
+// CHECK: @__llvm_profile_data_foo = linkonce_odr constant { i32, i32, i64, i8*, i64* } { i32 3, i32 1, i64 {{[0-9]+}}, i8* getelementptr inbounds ([3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64]* @__llvm_profile_counters_foo, i32 0, i32 0) }, section "__DATA,__llvm_prf_data", align 8
inline int foo(void) { return 1; }
int main(void) {
// CHECK: @__llvm_profile_runtime = external global i32
// CHECK: @__llvm_profile_counters_foo = global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
// CHECK: @__llvm_profile_name_foo = constant [3 x i8] c"foo", section "__DATA,__llvm_prf_names", align 1
-// CHECK: @__llvm_profile_data_foo = constant { i32, i32, i64, i8*, i64* } { i32 3, i32 1, i64 1, i8* getelementptr inbounds ([3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64]* @__llvm_profile_counters_foo, i32 0, i32 0) }, section "__DATA,__llvm_prf_data", align 8
+// CHECK: @__llvm_profile_data_foo = constant { i32, i32, i64, i8*, i64* } { i32 3, i32 1, i64 {{[0-9]+}}, i8* getelementptr inbounds ([3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64]* @__llvm_profile_counters_foo, i32 0, i32 0) }, section "__DATA,__llvm_prf_data", align 8
void foo(void) { }
// CHECK: @__llvm_profile_counters_foo_weak = weak global [5 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
// CHECK: @__llvm_profile_name_foo_weak = weak constant [8 x i8] c"foo_weak", section "__DATA,__llvm_prf_names", align 1
-// CHECK: @__llvm_profile_data_foo_weak = weak constant { i32, i32, i64, i8*, i64* } { i32 8, i32 5, i64 5, i8* getelementptr inbounds ([8 x i8]* @__llvm_profile_name_foo_weak, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64]* @__llvm_profile_counters_foo_weak, i32 0, i32 0) }, section "__DATA,__llvm_prf_data", align 8
+// CHECK: @__llvm_profile_data_foo_weak = weak constant { i32, i32, i64, i8*, i64* } { i32 8, i32 5, i64 {{[0-9]+}}, i8* getelementptr inbounds ([8 x i8]* @__llvm_profile_name_foo_weak, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64]* @__llvm_profile_counters_foo_weak, i32 0, i32 0) }, section "__DATA,__llvm_prf_data", align 8
void foo_weak(void) __attribute__((weak));
void foo_weak(void) { if (0){} if (0){} if (0){} if (0){} }
// CHECK: @__llvm_profile_counters_main = global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
// CHECK: @__llvm_profile_name_main = constant [4 x i8] c"main", section "__DATA,__llvm_prf_names", align 1
-// CHECK: @__llvm_profile_data_main = constant { i32, i32, i64, i8*, i64* } { i32 4, i32 1, i64 1, i8* getelementptr inbounds ([4 x i8]* @__llvm_profile_name_main, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64]* @__llvm_profile_counters_main, i32 0, i32 0) }, section "__DATA,__llvm_prf_data", align 8
+// CHECK: @__llvm_profile_data_main = constant { i32, i32, i64, i8*, i64* } { i32 4, i32 1, i64 {{[0-9]+}}, i8* getelementptr inbounds ([4 x i8]* @__llvm_profile_name_main, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64]* @__llvm_profile_counters_main, i32 0, i32 0) }, section "__DATA,__llvm_prf_data", align 8
static void foo_internal(void);
int main(void) {
foo();
// CHECK: @__llvm_profile_counters_foo_internal = internal global [3 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
// CHECK: @__llvm_profile_name_foo_internal = internal constant [24 x i8] c"c-linkage.c:foo_internal", section "__DATA,__llvm_prf_names", align 1
-// CHECK: @__llvm_profile_data_foo_internal = internal constant { i32, i32, i64, i8*, i64* } { i32 24, i32 3, i64 3, i8* getelementptr inbounds ([24 x i8]* @__llvm_profile_name_foo_internal, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64]* @__llvm_profile_counters_foo_internal, i32 0, i32 0) }, section "__DATA,__llvm_prf_data", align 8
+// CHECK: @__llvm_profile_data_foo_internal = internal constant { i32, i32, i64, i8*, i64* } { i32 24, i32 3, i64 {{[0-9]+}}, i8* getelementptr inbounds ([24 x i8]* @__llvm_profile_name_foo_internal, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64]* @__llvm_profile_counters_foo_internal, i32 0, i32 0) }, section "__DATA,__llvm_prf_data", align 8
static void foo_internal(void) { if (0){} if (0){} }
// CHECK: @llvm.used = appending global [5 x i8*] [i8* bitcast (i32 ()* @__llvm_profile_runtime_user to i8*), i8* bitcast ({ i32, i32, i64, i8*, i64* }* @__llvm_profile_data_foo to i8*), i8* bitcast ({ i32, i32, i64, i8*, i64* }* @__llvm_profile_data_foo_weak to i8*), i8* bitcast ({ i32, i32, i64, i8*, i64* }* @__llvm_profile_data_main to i8*), i8* bitcast ({ i32, i32, i64, i8*, i64* }* @__llvm_profile_data_foo_internal to i8*)], section "llvm.metadata"
// CHECK: @__llvm_profile_runtime = external global i32
// CHECK: @__llvm_profile_counters__Z3foov = global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
// CHECK: @__llvm_profile_name__Z3foov = constant [7 x i8] c"_Z3foov", section "__DATA,__llvm_prf_names", align 1
-// CHECK: @__llvm_profile_data__Z3foov = constant { i32, i32, i64, i8*, i64* } { i32 7, i32 1, i64 1, i8* getelementptr inbounds ([7 x i8]* @__llvm_profile_name__Z3foov, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64]* @__llvm_profile_counters__Z3foov, i32 0, i32 0) }, section "__DATA,__llvm_prf_data", align 8
+// CHECK: @__llvm_profile_data__Z3foov = constant { i32, i32, i64, i8*, i64* } { i32 7, i32 1, i64 {{[0-9]+}}, i8* getelementptr inbounds ([7 x i8]* @__llvm_profile_name__Z3foov, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64]* @__llvm_profile_counters__Z3foov, i32 0, i32 0) }, section "__DATA,__llvm_prf_data", align 8
void foo(void) { }
// CHECK: @__llvm_profile_counters__Z8foo_weakv = weak global [5 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
// CHECK: @__llvm_profile_name__Z8foo_weakv = weak constant [12 x i8] c"_Z8foo_weakv", section "__DATA,__llvm_prf_names", align 1
-// CHECK: @__llvm_profile_data__Z8foo_weakv = weak constant { i32, i32, i64, i8*, i64* } { i32 12, i32 5, i64 5, i8* getelementptr inbounds ([12 x i8]* @__llvm_profile_name__Z8foo_weakv, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64]* @__llvm_profile_counters__Z8foo_weakv, i32 0, i32 0) }, section "__DATA,__llvm_prf_data", align 8
+// CHECK: @__llvm_profile_data__Z8foo_weakv = weak constant { i32, i32, i64, i8*, i64* } { i32 12, i32 5, i64 {{[0-9]+}}, i8* getelementptr inbounds ([12 x i8]* @__llvm_profile_name__Z8foo_weakv, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64]* @__llvm_profile_counters__Z8foo_weakv, i32 0, i32 0) }, section "__DATA,__llvm_prf_data", align 8
void foo_weak(void) __attribute__((weak));
void foo_weak(void) { if (0){} if (0){} if (0){} if (0){} }
// CHECK: @__llvm_profile_counters_main = global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
// CHECK: @__llvm_profile_name_main = constant [4 x i8] c"main", section "__DATA,__llvm_prf_names", align 1
-// CHECK: @__llvm_profile_data_main = constant { i32, i32, i64, i8*, i64* } { i32 4, i32 1, i64 1, i8* getelementptr inbounds ([4 x i8]* @__llvm_profile_name_main, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64]* @__llvm_profile_counters_main, i32 0, i32 0) }, section "__DATA,__llvm_prf_data", align 8
+// CHECK: @__llvm_profile_data_main = constant { i32, i32, i64, i8*, i64* } { i32 4, i32 1, i64 {{[0-9]+}}, i8* getelementptr inbounds ([4 x i8]* @__llvm_profile_name_main, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64]* @__llvm_profile_counters_main, i32 0, i32 0) }, section "__DATA,__llvm_prf_data", align 8
inline void foo_inline(void);
int main(void) {
foo();
// CHECK: @__llvm_profile_counters__Z10foo_inlinev = linkonce_odr global [7 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
// CHECK: @__llvm_profile_name__Z10foo_inlinev = linkonce_odr constant [15 x i8] c"_Z10foo_inlinev", section "__DATA,__llvm_prf_names", align 1
-// CHECK: @__llvm_profile_data__Z10foo_inlinev = linkonce_odr constant { i32, i32, i64, i8*, i64* } { i32 15, i32 7, i64 7, i8* getelementptr inbounds ([15 x i8]* @__llvm_profile_name__Z10foo_inlinev, i32 0, i32 0), i64* getelementptr inbounds ([7 x i64]* @__llvm_profile_counters__Z10foo_inlinev, i32 0, i32 0) }, section "__DATA,__llvm_prf_data", align 8
+// CHECK: @__llvm_profile_data__Z10foo_inlinev = linkonce_odr constant { i32, i32, i64, i8*, i64* } { i32 15, i32 7, i64 {{[0-9]+}}, i8* getelementptr inbounds ([15 x i8]* @__llvm_profile_name__Z10foo_inlinev, i32 0, i32 0), i64* getelementptr inbounds ([7 x i64]* @__llvm_profile_counters__Z10foo_inlinev, i32 0, i32 0) }, section "__DATA,__llvm_prf_data", align 8
inline void foo_inline(void) { if (0){} if (0){} if (0){} if (0){} if (0){} if (0){}}
// CHECK: @llvm.used = appending global [5 x i8*] [i8* bitcast (i32 ()* @__llvm_profile_runtime_user to i8*), i8* bitcast ({ i32, i32, i64, i8*, i64* }* @__llvm_profile_data__Z3foov to i8*), i8* bitcast ({ i32, i32, i64, i8*, i64* }* @__llvm_profile_data__Z8foo_weakv to i8*), i8* bitcast ({ i32, i32, i64, i8*, i64* }* @__llvm_profile_data_main to i8*), i8* bitcast ({ i32, i32, i64, i8*, i64* }* @__llvm_profile_data__Z10foo_inlinev to i8*)], section "llvm.metadata"