#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/Support/Path.h"
using namespace clang;
CGM.getCXXABI().EmitGuardedInit(*this, D, DeclPtr, PerformInit);
}
+void CodeGenFunction::EmitCXXGuardedInitBranch(llvm::Value *NeedsInit,
+ llvm::BasicBlock *InitBlock,
+ llvm::BasicBlock *NoInitBlock,
+ GuardKind Kind,
+ const VarDecl *D) {
+ assert((Kind == GuardKind::TlsGuard || D) && "no guarded variable");
+
+ // A guess at how many times we will enter the initialization of a
+ // variable, depending on the kind of variable.
+ static const uint64_t InitsPerTLSVar = 1024;
+ static const uint64_t InitsPerLocalVar = 1024 * 1024;
+
+ llvm::MDNode *Weights;
+ if (Kind == GuardKind::VariableGuard && !D->isLocalVarDecl()) {
+ // For non-local variables, don't apply any weighting for now. Due to our
+ // use of COMDATs, we expect there to be at most one initialization of the
+ // variable per DSO, but we have no way to know how many DSOs will try to
+ // initialize the variable.
+ Weights = nullptr;
+ } else {
+ uint64_t NumInits;
+ // FIXME: For the TLS case, collect and use profiling information to
+ // determine a more accurate brach weight.
+ if (Kind == GuardKind::TlsGuard || D->getTLSKind())
+ NumInits = InitsPerTLSVar;
+ else
+ NumInits = InitsPerLocalVar;
+
+ // The probability of us entering the initializer is
+ // 1 / (total number of times we attempt to initialize the variable).
+ llvm::MDBuilder MDHelper(CGM.getLLVMContext());
+ Weights = MDHelper.createBranchWeights(1, NumInits - 1);
+ }
+
+ Builder.CreateCondBr(NeedsInit, InitBlock, NoInitBlock, Weights);
+}
+
llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction(
llvm::FunctionType *FTy, const Twine &Name, const CGFunctionInfo &FI,
SourceLocation Loc, bool TLS) {
"guard.uninitialized");
llvm::BasicBlock *InitBlock = createBasicBlock("init");
ExitBlock = createBasicBlock("exit");
- Builder.CreateCondBr(Uninit, InitBlock, ExitBlock);
+ EmitCXXGuardedInitBranch(Uninit, InitBlock, ExitBlock,
+ GuardKind::TlsGuard, nullptr);
EmitBlock(InitBlock);
// Mark as initialized before initializing anything else. If the
// initializers use previously-initialized thread_local vars, that's
void EmitCXXGuardedInit(const VarDecl &D, llvm::GlobalVariable *DeclPtr,
bool PerformInit);
+ enum class GuardKind { VariableGuard, TlsGuard };
+
+ /// Emit a branch to select whether or not to perform guarded initialization.
+ void EmitCXXGuardedInitBranch(llvm::Value *NeedsInit,
+ llvm::BasicBlock *InitBlock,
+ llvm::BasicBlock *NoInitBlock,
+ GuardKind Kind, const VarDecl *D);
+
/// GenerateCXXGlobalInitFunc - Generates code for initializing global
/// variables.
void GenerateCXXGlobalInitFunc(llvm::Function *Fn,
(UseARMGuardVarABI && !useInt8GuardVariable)
? Builder.CreateAnd(LI, llvm::ConstantInt::get(CGM.Int8Ty, 1))
: LI;
- llvm::Value *isInitialized = Builder.CreateIsNull(V, "guard.uninitialized");
+ llvm::Value *NeedsInit = Builder.CreateIsNull(V, "guard.uninitialized");
llvm::BasicBlock *InitCheckBlock = CGF.createBasicBlock("init.check");
llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end");
// Check if the first byte of the guard variable is zero.
- Builder.CreateCondBr(isInitialized, InitCheckBlock, EndBlock);
+ CGF.EmitCXXGuardedInitBranch(NeedsInit, InitCheckBlock, EndBlock,
+ CodeGenFunction::GuardKind::VariableGuard, &D);
CGF.EmitBlock(InitCheckBlock);
// Test our bit from the guard variable.
llvm::ConstantInt *Bit = llvm::ConstantInt::get(GuardTy, 1ULL << GuardNum);
llvm::LoadInst *LI = Builder.CreateLoad(GuardAddr);
- llvm::Value *IsInitialized =
- Builder.CreateICmpNE(Builder.CreateAnd(LI, Bit), Zero);
+ llvm::Value *NeedsInit =
+ Builder.CreateICmpEQ(Builder.CreateAnd(LI, Bit), Zero);
llvm::BasicBlock *InitBlock = CGF.createBasicBlock("init");
llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end");
- Builder.CreateCondBr(IsInitialized, EndBlock, InitBlock);
+ CGF.EmitCXXGuardedInitBranch(NeedsInit, InitBlock, EndBlock,
+ CodeGenFunction::GuardKind::VariableGuard, &D);
// Set our bit in the guard variable and emit the initializer and add a global
// destructor if appropriate.
Builder.CreateICmpSGT(FirstGuardLoad, InitThreadEpoch);
llvm::BasicBlock *AttemptInitBlock = CGF.createBasicBlock("init.attempt");
llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end");
- Builder.CreateCondBr(IsUninitialized, AttemptInitBlock, EndBlock);
+ CGF.EmitCXXGuardedInitBranch(IsUninitialized, AttemptInitBlock, EndBlock,
+ CodeGenFunction::GuardKind::VariableGuard, &D);
// This BasicBlock attempts to determine whether or not this thread is
// responsible for doing the initialization.
// CHECK-LABEL: define linkonce_odr dereferenceable({{[0-9]+}}) %struct.S* @"\01?getS@@YAAAUS@@XZ"() {{.*}} comdat
// CHECK: load i32, i32* @"\01??_B?1??getS@@YAAAUS@@XZ@51"
// CHECK: and i32 {{.*}}, 1
-// CHECK: icmp ne i32 {{.*}}, 0
+// CHECK: icmp eq i32 {{.*}}, 0
// CHECK: br i1
// init:
// CHECK: or i32 {{.*}}, 1
static thread_local S s;
// CHECK: %[[guard:.*]] = load i32, i32* @"\01??__J?1??f@@YAAAUS@@XZ@51"
// CHECK-NEXT: %[[mask:.*]] = and i32 %[[guard]], 1
-// CHECK-NEXT: %[[cmp:.*]] = icmp ne i32 %[[mask]], 0
-// CHECK-NEXT: br i1 %[[cmp]], label %[[init_end:.*]], label %[[init:.*]]
+// CHECK-NEXT: %[[cmp:.*]] = icmp eq i32 %[[mask]], 0
+// CHECK-NEXT: br i1 %[[cmp]], label %[[init:.*]], label %[[init_end:.*]], !prof ![[unlikely_threadlocal:.*]]
//
// CHECK: [[init]]:
// CHECK-NEXT: %[[or:.*]] = or i32 %[[guard]], 1
// CHECK: %[[guard:.*]] = load atomic i32, i32* @"\01?$TSS0@?1??g@@YAAAUS@@XZ@4HA" unordered, align 4
// CHECK-NEXT: %[[epoch:.*]] = load i32, i32* @_Init_thread_epoch
// CHECK-NEXT: %[[cmp:.*]] = icmp sgt i32 %[[guard]], %[[epoch]]
-// CHECK-NEXT: br i1 %[[cmp]], label %[[init_attempt:.*]], label %[[init_end:.*]]
+// CHECK-NEXT: br i1 %[[cmp]], label %[[init_attempt:.*]], label %[[init_end:.*]], !prof ![[unlikely_staticlocal:.*]]
//
// CHECK: [[init_attempt]]:
// CHECK-NEXT: call void @_Init_thread_header(i32* @"\01?$TSS0@?1??g@@YAAAUS@@XZ@4HA")
static int i = f1();
return i;
}
+
+// CHECK-DAG: ![[unlikely_threadlocal]] = !{!"branch_weights", i32 1, i32 1023}
+// CHECK-DAG: ![[unlikely_staticlocal]] = !{!"branch_weights", i32 1, i32 1048575}
// WEBASSEMBLY32: %[[R0:.+]] = load atomic i8, i8* bitcast (i32* @_ZGVZ1gvE1a to i8*) acquire, align 4
// WEBASSEMBLY32-NEXT: %[[R1:.+]] = and i8 %[[R0]], 1
// WEBASSEMBLY32-NEXT: %[[R2:.+]] = icmp eq i8 %[[R1]], 0
-// WEBASSEMBLY32-NEXT: br i1 %[[R2]], label %[[CHECK:.+]], label %[[END:.+]]
+// WEBASSEMBLY32-NEXT: br i1 %[[R2]], label %[[CHECK:.+]], label %[[END:.+]],
// WEBASSEMBLY32: [[CHECK]]
// WEBASSEMBLY32: call i32 @__cxa_guard_acquire
// WEBASSEMBLY32: [[END]]
// WEBASSEMBLY64: %[[R0:.+]] = load atomic i8, i8* bitcast (i64* @_ZGVZ1gvE1a to i8*) acquire, align 8
// WEBASSEMBLY64-NEXT: %[[R1:.+]] = and i8 %[[R0]], 1
// WEBASSEMBLY64-NEXT: %[[R2:.+]] = icmp eq i8 %[[R1]], 0
-// WEBASSEMBLY64-NEXT: br i1 %[[R2]], label %[[CHECK:.+]], label %[[END:.+]]
+// WEBASSEMBLY64-NEXT: br i1 %[[R2]], label %[[CHECK:.+]], label %[[END:.+]],
// WEBASSEMBLY64: [[CHECK]]
// WEBASSEMBLY64: call i32 @__cxa_guard_acquire
// WEBASSEMBLY64: [[END]]
--- /dev/null
+// RUN: %clang_cc1 -emit-llvm -std=c++1z %s -o - -triple=x86_64-linux-gnu | FileCheck %s
+
+struct S { S(); ~S(); };
+
+// CHECK-LABEL: define {{.*}}global_var_init
+// CHECK-NOT: br
+// CHECK: call void @_ZN1SC1Ev({{.*}}* @global)
+S global;
+
+// CHECK-LABEL: define {{.*}}global_var_init
+// FIXME: Do we really need thread-safe initialization here? We don't run
+// global ctors on multiple threads. (If we were to do so, we'd need thread-safe
+// init for B<int>::member and B<int>::inline_member too.)
+// CHECK: load atomic i8, i8* bitcast (i64* @_ZGV13inline_global to i8*) acquire,
+// CHECK: icmp eq i8 {{.*}}, 0
+// CHECK: br i1
+// CHECK-NOT: !prof
+// CHECK: call void @_ZN1SC1Ev({{.*}}* @inline_global)
+inline S inline_global;
+
+// CHECK-LABEL: define {{.*}}global_var_init
+// CHECK-NOT: br
+// CHECK: call void @_ZN1SC1Ev({{.*}}* @thread_local_global)
+thread_local S thread_local_global;
+
+// CHECK-LABEL: define {{.*}}global_var_init
+// CHECK: load i8, i8* bitcast (i64* @_ZGV26thread_local_inline_global to i8*)
+// CHECK: icmp eq i8 {{.*}}, 0
+// CHECK: br i1
+// CHECK-NOT: !prof
+// CHECK: call void @_ZN1SC1Ev({{.*}}* @thread_local_inline_global)
+thread_local inline S thread_local_inline_global;
+
+struct A {
+ static S member;
+ static thread_local S thread_local_member;
+
+ // CHECK-LABEL: define {{.*}}global_var_init
+ // CHECK: load atomic i8, i8* bitcast (i64* @_ZGVN1A13inline_memberE to i8*) acquire,
+ // CHECK: icmp eq i8 {{.*}}, 0
+ // CHECK: br i1
+ // CHECK-NOT: !prof
+ // CHECK: call void @_ZN1SC1Ev({{.*}}* @_ZN1A13inline_memberE)
+ static inline S inline_member;
+
+ // CHECK-LABEL: define {{.*}}global_var_init
+ // CHECK: load i8, i8* bitcast (i64* @_ZGVN1A26thread_local_inline_memberE to i8*)
+ // CHECK: icmp eq i8 {{.*}}, 0
+ // CHECK: br i1
+ // CHECK-NOT: !prof
+ // CHECK: call void @_ZN1SC1Ev({{.*}}* @_ZN1A26thread_local_inline_memberE)
+ static thread_local inline S thread_local_inline_member;
+};
+
+// CHECK-LABEL: define void @_Z1fv()
+void f() {
+ // CHECK: load atomic i8, i8* bitcast (i64* @_ZGVZ1fvE12static_local to i8*) acquire,
+ // CHECK: icmp eq i8 {{.*}}, 0
+ // CHECK: br i1 {{.*}}, !prof ![[WEIGHTS_LOCAL:[0-9]*]]
+ static S static_local;
+
+ // CHECK: load i8, i8* @_ZGVZ1fvE19static_thread_local,
+ // CHECK: icmp eq i8 {{.*}}, 0
+ // CHECK: br i1 {{.*}}, !prof ![[WEIGHTS_THREAD_LOCAL:[0-9]*]]
+ static thread_local S static_thread_local;
+}
+
+// CHECK-LABEL: define {{.*}}global_var_init
+// CHECK-NOT: br
+// CHECK: call void @_ZN1SC1Ev({{.*}}* @_ZN1A6memberE)
+S A::member;
+
+// CHECK-LABEL: define {{.*}}global_var_init
+// CHECK-NOT: br
+// CHECK: call void @_ZN1SC1Ev({{.*}}* @_ZN1A19thread_local_memberE)
+thread_local S A::thread_local_member;
+
+template <typename T> struct B {
+ // CHECK-LABEL: define {{.*}}global_var_init
+ // CHECK: load i8, i8* bitcast (i64* @_ZGVN1BIiE6memberE to i8*)
+ // CHECK: icmp eq i8 {{.*}}, 0
+ // CHECK: br i1
+ // CHECK-NOT: !prof
+ // CHECK: call void @_ZN1SC1Ev({{.*}}* @_ZN1BIiE6memberE)
+ static S member;
+
+ // CHECK-LABEL: define {{.*}}global_var_init
+ // CHECK: load i8, i8* bitcast (i64* @_ZGVN1BIiE13inline_memberE to i8*)
+ // CHECK: icmp eq i8 {{.*}}, 0
+ // CHECK: br i1
+ // CHECK-NOT: !prof
+ // CHECK: call void @_ZN1SC1Ev({{.*}}* @_ZN1BIiE13inline_memberE)
+ static inline S inline_member;
+
+ // CHECK-LABEL: define {{.*}}global_var_init
+ // CHECK: load i8, i8* bitcast (i64* @_ZGVN1BIiE19thread_local_memberE to i8*)
+ // CHECK: icmp eq i8 {{.*}}, 0
+ // CHECK: br i1
+ // CHECK-NOT: !prof
+ // CHECK: call void @_ZN1SC1Ev({{.*}}* @_ZN1BIiE19thread_local_memberE)
+ static thread_local S thread_local_member;
+
+ // CHECK-LABEL: define {{.*}}global_var_init
+ // CHECK: load i8, i8* bitcast (i64* @_ZGVN1BIiE26thread_local_inline_memberE to i8*)
+ // CHECK: icmp eq i8 {{.*}}, 0
+ // CHECK: br i1
+ // CHECK-NOT: !prof
+ // CHECK: call void @_ZN1SC1Ev({{.*}}* @_ZN1BIiE26thread_local_inline_memberE)
+ static thread_local inline S thread_local_inline_member;
+};
+template<typename T> S B<T>::member;
+template<typename T> thread_local S B<T>::thread_local_member;
+
+template<typename ...T> void use(T &...);
+void use_b() {
+ use(B<int>::member, B<int>::inline_member, B<int>::thread_local_member,
+ B<int>::thread_local_inline_member);
+}
+
+// CHECK-LABEL: define {{.*}}tls_init()
+// CHECK: load i8, i8* @__tls_guard, align 1
+// CHECK: icmp eq i8 {{.*}}, 0
+// CHECK: br i1 {{.*}}, !prof ![[WEIGHTS_THREAD_LOCAL]]
+
+// CHECK-DAG: ![[WEIGHTS_THREAD_LOCAL]] = !{!"branch_weights", i32 1, i32 1023}
+// CHECK-DAG: ![[WEIGHTS_LOCAL]] = !{!"branch_weights", i32 1, i32 1048575}