From: Alexey Bataev Date: Mon, 15 Feb 2016 08:07:17 +0000 (+0000) Subject: [OPENMP] Remove extra sync barriers for 'firstprivate' clause. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=60ea58923cedab42181297ba150cab731449fc67;p=clang [OPENMP] Remove extra sync barriers for 'firstprivate' clause. Sync barrier will be emitted after generation of firstprivate variables only if one of the firstprivate vars is used in lastprivate clause. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@260877 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index b378cc6dea..1a9ff6eacd 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -376,14 +376,23 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { if (!HaveInsertPoint()) return false; + bool FirstprivateIsLastprivate = false; + llvm::DenseSet Lastprivates; + for (const auto *C : D.getClausesOfKind()) { + for (const auto *D : C->varlists()) + Lastprivates.insert( + cast(cast(D)->getDecl())->getCanonicalDecl()); + } llvm::DenseSet EmittedAsFirstprivate; for (const auto *C : D.getClausesOfKind()) { auto IRef = C->varlist_begin(); auto InitsRef = C->inits().begin(); for (auto IInit : C->private_copies()) { auto *OrigVD = cast(cast(*IRef)->getDecl()); - if (EmittedAsFirstprivate.count(OrigVD) == 0) { - EmittedAsFirstprivate.insert(OrigVD); + FirstprivateIsLastprivate = + FirstprivateIsLastprivate || + (Lastprivates.count(OrigVD->getCanonicalDecl()) > 0); + if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { auto *VD = cast(cast(IInit)->getDecl()); auto *VDInit = cast(cast(*InitsRef)->getDecl()); bool IsRegistered; @@ -443,7 +452,7 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, ++IRef, ++InitsRef; } } - return !EmittedAsFirstprivate.empty(); + return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); } void CodeGenFunction::EmitOMPPrivateClause( @@ -490,7 +499,6 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { auto *VD = cast(cast(*IRef)->getDecl()); QualType Type = VD->getType(); if (CopiedVars.insert(VD->getCanonicalDecl()).second) { - // Get the address of the master variable. If we are emitting code with // TLS support, the address is passed from the master as field in the // captured declaration. @@ -964,12 +972,11 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { auto &&CodeGen = [&S](CodeGenFunction &CGF) { OMPPrivateScope PrivateScope(CGF); bool Copyins = CGF.EmitOMPCopyinClause(S); - bool Firstprivates = CGF.EmitOMPFirstprivateClause(S, PrivateScope); - if (Copyins || Firstprivates) { + (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); + if (Copyins) { // Emit implicit barrier to synchronize threads and avoid data races on - // initialization of firstprivate variables or propagation master's thread - // values of threadprivate variables to local instances of that variables - // of all other implicit threads. + // propagation master's thread values of threadprivate variables to local + // instances of that variables of all other implicit threads. CGF.CGM.getOpenMPRuntime().emitBarrierCall( CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, /*ForceSimpleCall=*/true); @@ -1605,7 +1612,8 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { OMPPrivateScope LoopScope(*this); if (EmitOMPFirstprivateClause(S, LoopScope)) { // Emit implicit barrier to synchronize threads and avoid data races on - // initialization of firstprivate variables. + // initialization of firstprivate variables and post-update of + // lastprivate variables. CGM.getOpenMPRuntime().emitBarrierCall( *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, /*ForceSimpleCall=*/true); @@ -1804,7 +1812,8 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { CodeGenFunction::OMPPrivateScope LoopScope(CGF); if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { // Emit implicit barrier to synchronize threads and avoid data races on - // initialization of firstprivate variables. + // initialization of firstprivate variables and post-update of lastprivate + // variables. CGF.CGM.getOpenMPRuntime().emitBarrierCall( CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, /*ForceSimpleCall=*/true); @@ -1883,8 +1892,7 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { llvm::SmallVector SrcExprs; llvm::SmallVector AssignmentOps; // Check if there are any 'copyprivate' clauses associated with this - // 'single' - // construct. + // 'single' construct. // Build a list of copyprivate variables along with helper expressions // (, , = expressions) for (const auto *C : S.getClausesOfKind()) { @@ -1897,10 +1905,9 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { } LexicalScope Scope(*this, S.getSourceRange()); // Emit code for 'single' region along with 'copyprivate' clauses - bool HasFirstprivates; - auto &&CodeGen = [&S, &HasFirstprivates](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF) { CodeGenFunction::OMPPrivateScope SingleScope(CGF); - HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope); + (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); CGF.EmitOMPPrivateClause(S, SingleScope); (void)SingleScope.Privatize(); @@ -1909,10 +1916,9 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), CopyprivateVars, DestExprs, SrcExprs, AssignmentOps); - // Emit an implicit barrier at the end (to avoid data race on firstprivate - // init or if no 'nowait' clause was specified and no 'copyprivate' clause). - if ((!S.getSingleClause() || HasFirstprivates) && - CopyprivateVars.empty()) { + // Emit an implicit barrier at the end (to avoid data race if no 'nowait' + // clause was specified and no 'copyprivate' clause). + if (!S.getSingleClause() && CopyprivateVars.empty()) { CGM.getOpenMPRuntime().emitBarrierCall( *this, S.getLocStart(), S.getSingleClause() ? OMPD_unknown : OMPD_single); diff --git a/test/OpenMP/for_firstprivate_codegen.cpp b/test/OpenMP/for_firstprivate_codegen.cpp index 01a9355945..5b1a406941 100644 --- a/test/OpenMP/for_firstprivate_codegen.cpp +++ b/test/OpenMP/for_firstprivate_codegen.cpp @@ -95,7 +95,7 @@ int main() { // LAMBDA: [[SIVAR2_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR2_PRIVATE_ADDR_REF]] // LAMBDA: store i{{[0-9]+}} [[SIVAR2_VAL]], i{{[0-9]+}}* [[SIVAR2_PRIVATE_ADDR]] - // LAMBDA: call void @__kmpc_barrier( + // LAMBDA-NOT: call void @__kmpc_barrier( g = 1; g1 = 1; sivar = 2; @@ -158,7 +158,7 @@ int main() { // BLOCKS: [[SIVAR2_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF_ADDRR]] // BLOCKS: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR2_PRIVATE_ADDR]] - // BLOCKS: call void @__kmpc_barrier( + // BLOCKS-NOT: call void @__kmpc_barrier( g = 1; g1 =1; sivar = 2; @@ -246,7 +246,7 @@ int main() { // CHECK: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIV]] // Synchronization for initialization. -// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]]) +// CHECK-NOT: call void @__kmpc_barrier( // CHECK: call void @__kmpc_for_static_init_4( // CHECK: call void @__kmpc_for_static_fini( @@ -310,10 +310,8 @@ int main() { // CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]]) // CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]]) -// Synchronization for initialization. -// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]] -// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]] -// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]]) +// No synchronization for initialization. +// CHECK-NOT: call void @__kmpc_barrier( // CHECK: call void @__kmpc_for_static_init_4( // CHECK: call void @__kmpc_for_static_fini( diff --git a/test/OpenMP/parallel_firstprivate_codegen.cpp b/test/OpenMP/parallel_firstprivate_codegen.cpp index d0da8cea87..cd50a0cb0e 100644 --- a/test/OpenMP/parallel_firstprivate_codegen.cpp +++ b/test/OpenMP/parallel_firstprivate_codegen.cpp @@ -31,7 +31,6 @@ struct S { // CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float } // CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } // CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} } -// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8* template T tmain() { @@ -70,7 +69,7 @@ int main() { // LAMBDA: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128 // LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]] // LAMBDA: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]] - // LAMBDA: call {{.*}}void @__kmpc_barrier( + // LAMBDA-NOT: call {{.*}}void @__kmpc_barrier( g = 1; sivar = 2; // LAMBDA: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], @@ -115,7 +114,7 @@ int main() { // BLOCK: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_REF_ADDR]] // BLOCKS: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]], // BLOCKS: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]], - // BLOCKS: call {{.*}}void @__kmpc_barrier( + // BLOCKS-NOT: call {{.*}}void @__kmpc_barrier( g = 1; sivar = 2; // BLOCKS: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], @@ -244,9 +243,7 @@ int main() { // CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]]) // CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]]) // CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]]) -// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]] -// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]] -// CHECK: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]]) +// CHECK-NOT: call {{.*}}void @__kmpc_barrier( // CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]]) // CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]* // CHECK: ret void diff --git a/test/OpenMP/sections_firstprivate_codegen.cpp b/test/OpenMP/sections_firstprivate_codegen.cpp index 0e9273f52c..dd854b57b1 100644 --- a/test/OpenMP/sections_firstprivate_codegen.cpp +++ b/test/OpenMP/sections_firstprivate_codegen.cpp @@ -59,7 +59,6 @@ S s_arr[] = {1, 2}; // CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer, S var(3); // CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0, -// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8* // CHECK-DAG: [[SECTIONS_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8* // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) @@ -94,7 +93,7 @@ int main() { // LAMBDA: [[SIVAR1_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR1_REF]] // LAMBDA: store i{{[0-9]+}} [[SIVAR1_VAL]], i{{[0-9]+}}* [[SIVAR1_PRIVATE_ADDR]] - // LAMBDA: call void @__kmpc_barrier( + // LAMBDA-NOT: call void @__kmpc_barrier( { g = 1; sivar = 10; @@ -154,7 +153,7 @@ int main() { // BLOCKS: [[SIVAR1_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR1_REF]], // BLOCKS: store i{{[0-9]+}} [[SIVAR1_VAL]], i{{[0-9]+}}* [[SIVAR1_PRIVATE_ADDR]], - // BLOCKS: call void @__kmpc_barrier( + // BLOCKS-NOT: call void @__kmpc_barrier( { g = 1; sivar = 10; @@ -242,7 +241,7 @@ int main() { // CHECK: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR]], // CHECK: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIV]], -// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]]) +// CHECK-NOT: call void @__kmpc_barrier( // CHECK: call void @__kmpc_for_static_init_4( // CHECK: call void @__kmpc_for_static_fini( @@ -304,10 +303,8 @@ int main() { // CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]]) // CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]]) -// Synchronization for initialization. -// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]] -// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]] -// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]]) +// No synchronization for initialization. +// CHECK-NOT: call void @__kmpc_barrier( // CHECK: call void @__kmpc_for_static_init_4( // CHECK: call void @__kmpc_for_static_fini( diff --git a/test/OpenMP/single_firstprivate_codegen.cpp b/test/OpenMP/single_firstprivate_codegen.cpp index cc72addb5f..8407f77243 100644 --- a/test/OpenMP/single_firstprivate_codegen.cpp +++ b/test/OpenMP/single_firstprivate_codegen.cpp @@ -57,7 +57,6 @@ int vec[] = {1, 2}; S s_arr[] = {1, 2}; // CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer, S var(3); -// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8* // CHECK-DAG: [[SINGLE_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 322, i32 0, i32 0, i8* // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) @@ -215,7 +214,7 @@ int main() { // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* // CHECK: call void @__kmpc_end_single( -// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]]) +// CHECK-NOT: call void @__kmpc_barrier( // CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]()