const FieldDecl *lookup(const VarDecl *VD) const override {
if (OuterRegionInfo)
return OuterRegionInfo->lookup(VD);
- llvm_unreachable("Trying to reference VarDecl that is neither local nor "
- "captured in outer OpenMP region");
+ // If there is no outer outlined region,no need to lookup in a list of
+ // captured variables, we can use the original one.
+ return nullptr;
}
FieldDecl *getThisFieldDecl() const override {
if (OuterRegionInfo)
}
}
-void CodeGenFunction::EmitOMPFirstprivateClause(
- const OMPExecutableDirective &D,
- CodeGenFunction::OMPPrivateScope &PrivateScope) {
- auto PrivateFilter = [](const OMPClause *C) -> bool {
+bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
+ OMPPrivateScope &PrivateScope) {
+ auto FirstprivateFilter = [](const OMPClause *C) -> bool {
return C->getClauseKind() == OMPC_firstprivate;
};
- for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)>
- I(D.clauses(), PrivateFilter); I; ++I) {
+ llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
+ for (OMPExecutableDirective::filtered_clause_iterator<decltype(
+ FirstprivateFilter)> I(D.clauses(), FirstprivateFilter);
+ I; ++I) {
auto *C = cast<OMPFirstprivateClause>(*I);
auto IRef = C->varlist_begin();
auto InitsRef = C->inits().begin();
for (auto IInit : C->private_copies()) {
auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
- auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
- bool IsRegistered;
- if (*InitsRef != nullptr) {
- // Emit VarDecl with copy init for arrays.
- auto *FD = CapturedStmtInfo->lookup(OrigVD);
- LValue Base = MakeNaturalAlignAddrLValue(
- CapturedStmtInfo->getContextValue(),
- getContext().getTagDeclType(FD->getParent()));
- auto *OriginalAddr = EmitLValueForField(Base, FD).getAddress();
- auto VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
- IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{
- auto Emission = EmitAutoVarAlloca(*VD);
- // Emit initialization of aggregate firstprivate vars.
- auto *Init = VD->getInit();
- if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
- // Perform simple memcpy.
- EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
- (*IRef)->getType());
- } else {
- EmitOMPAggregateAssign(
- Emission.getAllocatedAddress(), OriginalAddr,
- (*IRef)->getType(),
- [this, VDInit, Init](llvm::Value *DestElement,
- llvm::Value *SrcElement) {
- // Clean up any temporaries needed by the initialization.
- RunCleanupsScope InitScope(*this);
- // Emit initialization for single element.
- LocalDeclMap[VDInit] = SrcElement;
- EmitAnyExprToMem(Init, DestElement,
- Init->getType().getQualifiers(),
- /*IsInitializer*/ false);
- LocalDeclMap.erase(VDInit);
- });
- }
- EmitAutoVarCleanups(Emission);
- return Emission.getAllocatedAddress();
- });
- } else
- IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{
- // Emit private VarDecl with copy init.
- EmitDecl(*VD);
- return GetAddrOfLocalVar(VD);
- });
- assert(IsRegistered && "firstprivate var already registered as private");
- // Silence the warning about unused variable.
- (void)IsRegistered;
+ if (EmittedAsFirstprivate.count(OrigVD) == 0) {
+ EmittedAsFirstprivate.insert(OrigVD);
+ auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
+ auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
+ bool IsRegistered;
+ DeclRefExpr DRE(
+ const_cast<VarDecl *>(OrigVD),
+ /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
+ OrigVD) != nullptr,
+ (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
+ auto *OriginalAddr = EmitLValue(&DRE).getAddress();
+ if (OrigVD->getType()->isArrayType()) {
+ // Emit VarDecl with copy init for arrays.
+ // Get the address of the original variable captured in current
+ // captured region.
+ IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{
+ auto Emission = EmitAutoVarAlloca(*VD);
+ auto *Init = VD->getInit();
+ if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
+ // Perform simple memcpy.
+ EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
+ (*IRef)->getType());
+ } else {
+ EmitOMPAggregateAssign(
+ Emission.getAllocatedAddress(), OriginalAddr,
+ (*IRef)->getType(),
+ [this, VDInit, Init](llvm::Value *DestElement,
+ llvm::Value *SrcElement) {
+ // Clean up any temporaries needed by the initialization.
+ RunCleanupsScope InitScope(*this);
+ // Emit initialization for single element.
+ LocalDeclMap[VDInit] = SrcElement;
+ EmitAnyExprToMem(Init, DestElement,
+ Init->getType().getQualifiers(),
+ /*IsInitializer*/ false);
+ LocalDeclMap.erase(VDInit);
+ });
+ }
+ EmitAutoVarCleanups(Emission);
+ return Emission.getAllocatedAddress();
+ });
+ } else {
+ IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{
+ // Emit private VarDecl with copy init.
+ // Remap temp VDInit variable to the address of the original
+ // variable
+ // (for proper handling of captured global variables).
+ LocalDeclMap[VDInit] = OriginalAddr;
+ EmitDecl(*VD);
+ LocalDeclMap.erase(VDInit);
+ return GetAddrOfLocalVar(VD);
+ });
+ }
+ assert(IsRegistered &&
+ "firstprivate var already registered as private");
+ // Silence the warning about unused variable.
+ (void)IsRegistered;
+ }
++IRef, ++InitsRef;
}
}
+ return !EmittedAsFirstprivate.empty();
}
void CodeGenFunction::EmitOMPPrivateClause(
// Emit parallel region as a standalone region.
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
OMPPrivateScope PrivateScope(CGF);
- CGF.EmitOMPPrivateClause(S, PrivateScope);
- CGF.EmitOMPFirstprivateClause(S, PrivateScope);
- CGF.EmitOMPReductionClauseInit(S, PrivateScope);
- if (PrivateScope.Privatize())
- // Emit implicit barrier to synchronize threads and avoid data races.
+ if (CGF.EmitOMPFirstprivateClause(S, PrivateScope)) {
+ // Emit implicit barrier to synchronize threads and avoid data races on
+ // initialization of firstprivate variables.
CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
OMPD_unknown);
+ }
+ CGF.EmitOMPPrivateClause(S, PrivateScope);
+ CGF.EmitOMPReductionClauseInit(S, PrivateScope);
+ (void)PrivateScope.Privatize();
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
CGF.EmitOMPReductionClauseFinal(S);
// Emit implicit barrier at the end of the 'parallel' directive.
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
OMPPrivateScope LoopScope(*this);
+ if (EmitOMPFirstprivateClause(S, LoopScope)) {
+ // Emit implicit barrier to synchronize threads and avoid data races on
+ // initialization of firstprivate variables.
+ CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
+ OMPD_unknown);
+ }
EmitPrivateLoopCounters(*this, LoopScope, S.counters());
(void)LoopScope.Privatize();
LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
llvm::AtomicOrdering AO, SourceLocation Loc,
const llvm::function_ref<RValue(RValue)> &CommonGen);
- void EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
+ bool EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
OMPPrivateScope &PrivateScope);
void EmitOMPPrivateClause(const OMPExecutableDirective &D,
OMPPrivateScope &PrivateScope);
// A variable of class type (or array thereof) that appears in a private
// clause requires an accessible, unambiguous copy constructor for the
// class type.
- Type = Context.getBaseElementType(Type);
+ Type = Context.getBaseElementType(Type).getNonReferenceType();
// If an implicit firstprivate variable found it was checked already.
if (!IsImplicitClause) {
}
}
- Type = Type.getUnqualifiedType();
- auto VDPrivate = VarDecl::Create(Context, CurContext, DE->getLocStart(),
- ELoc, VD->getIdentifier(), VD->getType(),
- VD->getTypeSourceInfo(), /*S*/ SC_Auto);
+ auto VDPrivate =
+ VarDecl::Create(Context, CurContext, DE->getLocStart(), ELoc,
+ VD->getIdentifier(), VD->getType().getUnqualifiedType(),
+ VD->getTypeSourceInfo(), /*S*/ SC_Auto);
// Generate helper private variable and initialize it with the value of the
// original variable. The address of the original variable is replaced by
// the address of the new private variable in the CodeGen. This new variable
/*TemplateKWLoc*/ SourceLocation(), VDInit,
/*RefersToEnclosingVariableOrCapture*/ true, ELoc, Type,
/*VK*/ VK_LValue);
- VDInit->setIsUsed();
auto Init = DefaultLvalueConversion(VDInitRefExpr).get();
- InitializedEntity Entity = InitializedEntity::InitializeVariable(VDInit);
+ auto *VDInitTemp =
+ BuildVarDecl(*this, DE->getLocStart(), Type.getUnqualifiedType(),
+ ".firstprivate.temp");
+ InitializedEntity Entity =
+ InitializedEntity::InitializeVariable(VDInitTemp);
InitializationKind Kind = InitializationKind::CreateCopy(ELoc, ELoc);
InitializationSequence InitSeq(*this, Entity, Kind, Init);
else
VDPrivate->setInit(Result.getAs<Expr>());
} else {
- AddInitializerToDecl(
- VDPrivate,
- DefaultLvalueConversion(
- DeclRefExpr::Create(Context, NestedNameSpecifierLoc(),
- SourceLocation(), DE->getDecl(),
- /*RefersToEnclosingVariableOrCapture=*/true,
- DE->getExprLoc(), DE->getType(),
- /*VK=*/VK_LValue)).get(),
- /*DirectInit=*/false, /*TypeMayContainAuto=*/false);
+ auto *VDInit =
+ BuildVarDecl(*this, DE->getLocStart(), Type, ".firstprivate.temp");
+ VDInitRefExpr =
+ BuildDeclRefExpr(VDInit, Type, VK_LValue, DE->getExprLoc()).get();
+ AddInitializerToDecl(VDPrivate,
+ DefaultLvalueConversion(VDInitRefExpr).get(),
+ /*DirectInit=*/false, /*TypeMayContainAuto=*/false);
}
if (VDPrivate->isInvalidDecl()) {
if (IsImplicitClause) {
continue;
}
CurContext->addDecl(VDPrivate);
- auto VDPrivateRefExpr =
- DeclRefExpr::Create(Context, /*QualifierLoc*/ NestedNameSpecifierLoc(),
- /*TemplateKWLoc*/ SourceLocation(), VDPrivate,
- /*RefersToEnclosingVariableOrCapture*/ false,
- DE->getLocStart(), DE->getType(),
- /*VK*/ VK_LValue);
+ auto VDPrivateRefExpr = DeclRefExpr::Create(
+ Context, /*QualifierLoc*/ NestedNameSpecifierLoc(),
+ /*TemplateKWLoc*/ SourceLocation(), VDPrivate,
+ /*RefersToEnclosingVariableOrCapture*/ false, DE->getLocStart(),
+ DE->getType().getUnqualifiedType(), /*VK*/ VK_LValue);
DSAStack->addDSA(VD, DE, OMPC_firstprivate);
Vars.push_back(DE);
PrivateCopies.push_back(VDPrivateRefExpr);
--- /dev/null
+// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -std=c++11 -DLAMBDA -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
+// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -fblocks -DBLOCKS -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+struct St {
+ int a, b;
+ St() : a(0), b(0) {}
+ St(const St &st) : a(st.a + st.b), b(0) {}
+ ~St() {}
+};
+
+volatile int g = 1212;
+
+template <class T>
+struct S {
+ T f;
+ S(T a) : f(a + g) {}
+ S() : f(g) {}
+ S(const S &s, St t = St()) : f(s.f + t.a) {}
+ operator T() { return T(); }
+ ~S() {}
+};
+
+// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+// CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} }
+// CHECK-DAG: [[CAP_TMAIN_TY:%.+]] = type { i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* }
+
+template <typename T>
+T tmain() {
+ S<T> test;
+ T t_var = T();
+ T vec[] = {1, 2};
+ S<T> s_arr[] = {1, 2};
+ S<T> var(3);
+#pragma omp parallel
+#pragma omp for firstprivate(t_var, vec, s_arr, var)
+ for (int i = 0; i < 0; ++i) {
+ vec[i] = t_var;
+ s_arr[i] = var;
+ }
+ return T();
+}
+
+// CHECK: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> test;
+// CHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333,
+int t_var = 333;
+// CHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2],
+int vec[] = {1, 2};
+// CHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer,
+S<float> s_arr[] = {1, 2};
+// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> var(3);
+// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
+
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
+// CHECK: ([[S_FLOAT_TY]]*)* [[S_FLOAT_TY_DESTR:@[^ ]+]] {{[^,]+}}, {{.+}}([[S_FLOAT_TY]]* [[TEST]]
+int main() {
+#ifdef LAMBDA
+ // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
+ // LAMBDA-LABEL: @main
+ // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
+ [&]() {
+// LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+// LAMBDA: call void {{.+}}* @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}})
+#pragma omp parallel
+#pragma omp for firstprivate(g)
+ for (int i = 0; i < 2; ++i) {
+ // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
+ // Skip temp vars for loop
+ // LAMBDA: alloca i{{[0-9]+}},
+ // LAMBDA: alloca i{{[0-9]+}},
+ // LAMBDA: alloca i{{[0-9]+}},
+ // LAMBDA: alloca i{{[0-9]+}},
+ // LAMBDA: alloca i{{[0-9]+}},
+ // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+ // LAMBDA: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}, i{{[0-9]+}}* [[G]]
+ // LAMBDA: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
+ // LAMBDA: call i32 @__kmpc_cancel_barrier(
+ g = 1;
+ // LAMBDA: call void @__kmpc_for_static_init_4(
+ // LAMBDA: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
+ // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+ // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]]
+ // LAMBDA: call void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
+ // LAMBDA: call void @__kmpc_for_static_fini(
+ // LAMBDA: call i32 @__kmpc_cancel_barrier(
+ [&]() {
+ // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+ // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+ g = 2;
+ // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+ // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+ // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
+ // LAMBDA: store volatile i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+ }();
+ }
+ }();
+ return 0;
+#elif defined(BLOCKS)
+ // BLOCKS: [[G:@.+]] = global i{{[0-9]+}} 1212,
+ // BLOCKS-LABEL: @main
+ // BLOCKS: call void {{%.+}}(i8*
+ ^{
+// BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
+// BLOCKS: call void {{.+}}* @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}})
+#pragma omp parallel
+#pragma omp for firstprivate(g)
+ for (int i = 0; i < 2; ++i) {
+ // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
+ // Skip temp vars for loop
+ // BLOCKS: alloca i{{[0-9]+}},
+ // BLOCKS: alloca i{{[0-9]+}},
+ // BLOCKS: alloca i{{[0-9]+}},
+ // BLOCKS: alloca i{{[0-9]+}},
+ // BLOCKS: alloca i{{[0-9]+}},
+ // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+ // BLOCKS: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}, i{{[0-9]+}}* [[G]]
+ // BLOCKS: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
+ // BLOCKS: call i32 @__kmpc_cancel_barrier(
+ g = 1;
+ // BLOCKS: call void @__kmpc_for_static_init_4(
+ // BLOCKS: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
+ // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+ // BLOCKS: i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
+ // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+ // BLOCKS: call void {{%.+}}(i8*
+ // BLOCKS: call void @__kmpc_for_static_fini(
+ // BLOCKS: call i32 @__kmpc_cancel_barrier(
+ ^{
+ // BLOCKS: define {{.+}} void {{@.+}}(i8*
+ g = 2;
+ // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+ // BLOCKS: store volatile i{{[0-9]+}} 2, i{{[0-9]+}}*
+ // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+ // BLOCKS: ret
+ }();
+ }
+ }();
+ return 0;
+#else
+#pragma omp for firstprivate(t_var, vec, s_arr, var)
+ for (int i = 0; i < 0; ++i) {
+ vec[i] = t_var;
+ s_arr[i] = var;
+ }
+ return tmain<int>();
+#endif
+}
+
+// CHECK: define {{.*}}i{{[0-9]+}} @main()
+// CHECK: alloca i{{[0-9]+}},
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
+
+// firstprivate t_var(t_var)
+// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR]],
+// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_PRIV]],
+
+// firstprivate vec(vec)
+// CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*),
+
+// firstprivate s_arr(s_arr)
+// CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2
+// CHECK: [[IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]]
+// CHECK: br i1 [[IS_EMPTY]], label %[[S_ARR_BODY_DONE:.+]], label %[[S_ARR_BODY:.+]]
+// CHECK: [[S_ARR_BODY]]
+// CHECK: getelementptr inbounds ([2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0)
+// CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR:@.+]]([[ST_TY]]* [[ST_TY_TEMP:%.+]])
+// CHECK: call {{.*}} [[S_FLOAT_TY_COPY_CONSTR:@.+]]([[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, [[ST_TY]]* [[ST_TY_TEMP]])
+// CHECK: call {{.*}} [[ST_TY_DESTR:@.+]]([[ST_TY]]* [[ST_TY_TEMP]])
+// CHECK: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]]
+
+// firstprivate var(var)
+// CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]])
+// CHECK: call {{.*}} [[S_FLOAT_TY_COPY_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]* {{.*}} [[VAR]], [[ST_TY]]* [[ST_TY_TEMP]])
+// CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
+
+// Synchronization for initialization.
+// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void @__kmpc_for_static_fini(
+
+// ~(firstprivate var), ~(firstprivate s_arr)
+// CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
+// CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]*
+// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+
+// CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]()
+
+// CHECK: ret void
+
+// CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
+// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...)* @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_TMAIN_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void
+// CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
+// CHECK: ret
+//
+// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_TMAIN_TY]]* %{{.+}})
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
+
+// firstprivate t_var(t_var)
+// CHECK: [[T_VAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_PTR_REF]],
+// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_REF]],
+// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_PRIV]],
+
+// firstprivate vec(vec)
+// CHECK: [[VEC_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
+// CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_PTR_REF:%.+]],
+// CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]],
+
+// firstprivate s_arr(s_arr)
+// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
+// CHECK: [[S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_REF]],
+// CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2
+// CHECK: [[IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]]
+// CHECK: br i1 [[IS_EMPTY]], label %[[S_ARR_BODY_DONE:.+]], label %[[S_ARR_BODY:.+]]
+// CHECK: [[S_ARR_BODY]]
+// CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR:@.+]]([[ST_TY]]* [[ST_TY_TEMP:%.+]])
+// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR:@.+]]([[S_INT_TY]]* {{.+}}, [[S_INT_TY]]* {{.+}}, [[ST_TY]]* [[ST_TY_TEMP]])
+// CHECK: call {{.*}} [[ST_TY_DESTR:@.+]]([[ST_TY]]* [[ST_TY_TEMP]])
+// CHECK: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]]
+
+// firstprivate var(var)
+// CHECK: [[VAR_REF_PTR:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
+// CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_REF_PTR]],
+// CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]])
+// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]])
+// CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
+
+// Synchronization for initialization.
+// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
+// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
+// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void @__kmpc_for_static_fini(
+
+// ~(firstprivate var), ~(firstprivate s_arr)
+// CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]])
+// CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]*
+// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
+// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
+// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK: ret void
+#endif
+
S3 &operator=(const S3 &s3);
public:
- S3() : a(0) {} // expected-note {{candidate constructor not viable: requires 0 arguments, but 1 was provided}}
- S3(S3 &s3) : a(s3.a) {} // expected-note {{candidate constructor not viable: 1st argument ('const S3') would lose const qualifier}}
+ S3() : a(0) {} // expected-note 2 {{candidate constructor not viable: requires 0 arguments, but 1 was provided}}
+ S3(S3 &s3) : a(s3.a) {} // expected-note 2 {{candidate constructor not viable: 1st argument ('const S3') would lose const qualifier}}
};
const S3 c;
const S3 ca[5];
for (i = 0; i < argc; ++i)
foo();
#pragma omp parallel
-#pragma omp for firstprivate(a, b, c, d, f) // expected-error {{firstprivate variable with incomplete type 'S1'}} expected-error {{no matching constructor for initialization of 'const S3'}}
+#pragma omp for firstprivate(a, b, c, d, f) // expected-error {{firstprivate variable with incomplete type 'S1'}} expected-error {{no matching constructor for initialization of 'S3'}}
for (i = 0; i < argc; ++i)
foo();
#pragma omp parallel
for (i = 0; i < argc; ++i)
foo();
#pragma omp parallel
-#pragma omp for firstprivate(ca) // OK
+#pragma omp for firstprivate(ca) // expected-error {{no matching constructor for initialization of 'S3'}}
for (i = 0; i < argc; ++i)
foo();
#pragma omp parallel
// LAMBDA: [[G_REF_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
// LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_REF_ADDR]]
// LAMBDA: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}, i{{[0-9]+}}* [[G_REF]]
- // LAMBDA: store volatile i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
+ // LAMBDA: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
// LAMBDA: call i32 @__kmpc_cancel_barrier(
g = 1;
// LAMBDA: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
// BLOCKS: [[G_REF_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
// BLOCKS: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_REF_ADDR]]
// BLOCKS: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}, i{{[0-9]+}}* [[G_REF]]
- // BLOCKS: store volatile i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
+ // BLOCKS: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
// BLOCKS: call i32 @__kmpc_cancel_barrier(
g = 1;
// BLOCKS: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
// CHECK: [[CAP_MAIN_TY:%.+]] = type { [2 x i{{[0-9]+}}]*, i{{[0-9]+}}*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]* }
// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
// CHECK: [[CAP_TMAIN_TY:%.+]] = type { [2 x i{{[0-9]+}}]*, i{{[0-9]+}}*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* }
-// CHECK: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
template <typename T>
T tmain() {
S<T> test;
// LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
// LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
// LAMBDA: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
- // LAMBDA: call i32 @__kmpc_cancel_barrier(
g = 1;
// LAMBDA: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
// LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
// BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
// BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
// BLOCKS: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
- // BLOCKS: call i32 @__kmpc_cancel_barrier(
g = 1;
// BLOCKS: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
// BLOCKS-NOT: [[G]]{{[[^:word:]]}}
// CHECK-NOT: [[T_VAR_PRIV]]
// CHECK-NOT: [[VEC_PRIV]]
// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
-// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]]
-// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
// CHECK-DAG: call void [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
// CHECK-DAG: call void [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]*
// CHECK: ret void
// CHECK-NOT: [[T_VAR_PRIV]]
// CHECK-NOT: [[VEC_PRIV]]
// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]])
-// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]]
-// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
// CHECK-DAG: call void [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]])
// CHECK-DAG: call void [[S_INT_TY_DESTR]]([[S_INT_TY]]*
// CHECK: ret void
// LAMBDA: [[G_REF_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
// LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_REF_ADDR]]
// LAMBDA: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
- // LAMBDA: call i32 @__kmpc_cancel_barrier(
g = 1;
// LAMBDA: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
// LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
// BLOCKS: [[G_REF_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
// BLOCKS: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_REF_ADDR]]
// BLOCKS: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
- // BLOCKS: call i32 @__kmpc_cancel_barrier(
g = 1;
// BLOCKS: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
// BLOCKS-NOT: [[G]]{{[[^:word:]]}}
// For min reduction operation initial value of private variable is largest repesentable value.
// CHECK: store float 0x47EFFFFFE0000000, float* [[T_VAR1_PRIV]],
-// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
-// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
-
// Skip checks for internal operations.
// void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
// res = __kmpc_reduce_nowait(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>);
+// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
+// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
// CHECK: [[BITCAST:%.+]] = bitcast [4 x i8*]* [[RED_LIST]] to i8*
// CHECK: [[RES:%.+]] = call i32 @__kmpc_reduce_nowait(%{{.+}}* [[REDUCTION_LOC]], i32 [[GTID]], i32 4, i64 32, i8* [[BITCAST]], void (i8*, i8*)* [[REDUCTION_FUNC:@.+]], [8 x i32]* [[REDUCTION_LOCK]])
// For min reduction operation initial value of private variable is largest repesentable value.
// CHECK: store i{{[0-9]+}} 2147483647, i{{[0-9]+}}* [[T_VAR1_PRIV]],
-// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
-// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
-
// Skip checks for internal operations.
// void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
// res = __kmpc_reduce_nowait(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>);
+// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
+// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
// CHECK: [[BITCAST:%.+]] = bitcast [4 x i8*]* [[RED_LIST]] to i8*
// CHECK: [[RES:%.+]] = call i32 @__kmpc_reduce_nowait(%{{.+}}* [[REDUCTION_LOC]], i32 [[GTID]], i32 4, i64 32, i8* [[BITCAST]], void (i8*, i8*)* [[REDUCTION_FUNC:@.+]], [8 x i32]* [[REDUCTION_LOCK]])