From: Alexey Bataev Date: Fri, 9 Nov 2018 20:03:19 +0000 (+0000) Subject: [OPENMP][NVPTX]Extend number of constructs executed in SPMD mode. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a0fa523c073426d74b700f6c0ab46d62c130a8b6;p=clang [OPENMP][NVPTX]Extend number of constructs executed in SPMD mode. If the statements between target|teams|distribute directives does not require execution in master thread, like constant expressions, null statements, simple declarations, etc., such construct can be xecuted in SPMD mode. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@346551 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index ab9631c1fe..8a98454892 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -698,12 +698,58 @@ getDataSharingMode(CodeGenModule &CGM) { : CGOpenMPRuntimeNVPTX::Generic; } +// Checks if the expression is constant or does not have non-trivial function +// calls. +static bool isTrivial(ASTContext &Ctx, const Expr * E) { + // We can skip constant expressions. + // We can skip expressions with trivial calls or simple expressions. + return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || + !E->hasNonTrivialCall(Ctx)) && + !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); +} + /// Checks if the \p Body is the \a CompoundStmt and returns its child statement -/// iff there is only one. -static const Stmt *getSingleCompoundChild(const Stmt *Body) { - if (const auto *C = dyn_cast(Body)) - if (C->size() == 1) - return C->body_front(); +/// iff there is only one that is not evaluatable at the compile time. +static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body) { + if (const auto *C = dyn_cast(Body)) { + const Stmt *Child = nullptr; + for (const Stmt *S : C->body()) { + if (const auto *E = dyn_cast(S)) { + if (isTrivial(Ctx, E)) + continue; + } + // Some of the statements can be ignored. + if (isa(S) || isa(S) || isa(S) || + isa(S) || isa(S)) + continue; + // Analyze declarations. + if (const auto *DS = dyn_cast(S)) { + if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { + if (isa(D) || isa(D) || + isa(D) || isa(D) || + isa(D) || isa(D) || + isa(D) || + isa(D) || + isa(D)) + return true; + const auto *VD = dyn_cast(D); + if (!VD) + return false; + return VD->isConstexpr() || + ((VD->getType().isTrivialType(Ctx) || + VD->getType()->isReferenceType()) && + (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); + })) + continue; + } + // Found multiple children - cannot get the one child only. + if (Child) + return Body; + Child = S; + } + if (Child) + return Child; + } return Body; } @@ -732,7 +778,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, const auto *CS = D.getInnermostCapturedStmt(); const auto *Body = CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); - const Stmt *ChildStmt = getSingleCompoundChild(Body); + const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body); if (const auto *NestedDir = dyn_cast(ChildStmt)) { OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); @@ -746,7 +792,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Body); + ChildStmt = getSingleCompoundChild(Ctx, Body); if (const auto *NND = dyn_cast(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPParallelDirective(DKind) && @@ -905,7 +951,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, const auto *CS = D.getInnermostCapturedStmt(); const auto *Body = CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); - const Stmt *ChildStmt = getSingleCompoundChild(Body); + const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body); if (const auto *NestedDir = dyn_cast(ChildStmt)) { OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); @@ -920,7 +966,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Body); + ChildStmt = getSingleCompoundChild(Ctx, Body); if (const auto *NND = dyn_cast(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPWorksharingDirective(DKind) && @@ -932,7 +978,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Body); + ChildStmt = getSingleCompoundChild(Ctx, Body); if (const auto *NND = dyn_cast(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPParallelDirective(DKind) && @@ -944,7 +990,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Body); + ChildStmt = getSingleCompoundChild(Ctx, Body); if (const auto *NND = dyn_cast(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPWorksharingDirective(DKind) && @@ -965,7 +1011,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Body); + ChildStmt = getSingleCompoundChild(Ctx, Body); if (const auto *NND = dyn_cast(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPWorksharingDirective(DKind) && @@ -1287,10 +1333,6 @@ void CGOpenMPRuntimeNVPTX::emitSPMDKernel(const OMPExecutableDirective &D, IsInTTDRegion = false; } -static void -getDistributeLastprivateVars(const OMPExecutableDirective &D, - llvm::SmallVectorImpl &Vars); - void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader( CodeGenFunction &CGF, EntryFunctionState &EST, const OMPExecutableDirective &D) { @@ -1303,33 +1345,10 @@ void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader( // Initialize the OMP state in the runtime; called by all active threads. bool RequiresFullRuntime = CGM.getLangOpts().OpenMPCUDAForceFullRuntime || !supportsLightweightRuntime(CGF.getContext(), D); - // Check if we have inner distribute + lastprivate|reduction clauses. - bool RequiresDatasharing = RequiresFullRuntime; - if (!RequiresDatasharing) { - const OMPExecutableDirective *TD = &D; - if (!isOpenMPTeamsDirective(TD->getDirectiveKind()) && - !isOpenMPParallelDirective(TD->getDirectiveKind())) { - const Stmt *S = getSingleCompoundChild( - TD->getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers( - /*IgnoreCaptured=*/true)); - TD = cast(S); - } - if (!isOpenMPDistributeDirective(TD->getDirectiveKind()) && - !isOpenMPParallelDirective(TD->getDirectiveKind())) { - const Stmt *S = getSingleCompoundChild( - TD->getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers( - /*IgnoreCaptured=*/true)); - TD = cast(S); - } - if (isOpenMPDistributeDirective(TD->getDirectiveKind())) - RequiresDatasharing = TD->hasClausesOfKind() || - TD->hasClausesOfKind(); - } - llvm::Value *Args[] = { - getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true), - /*RequiresOMPRuntime=*/ - Bld.getInt16(RequiresFullRuntime ? 1 : 0), - /*RequiresDataSharing=*/Bld.getInt16(RequiresDatasharing ? 1 : 0)}; + llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true), + /*RequiresOMPRuntime=*/ + Bld.getInt16(RequiresFullRuntime ? 1 : 0), + /*RequiresDataSharing=*/Bld.getInt16(0)}; CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args); @@ -1928,13 +1947,14 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction( /// Get list of lastprivate variables from the teams distribute ... or /// teams {distribute ...} directives. static void -getDistributeLastprivateVars(const OMPExecutableDirective &D, +getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D, llvm::SmallVectorImpl &Vars) { assert(isOpenMPTeamsDirective(D.getDirectiveKind()) && "expected teams directive."); const OMPExecutableDirective *Dir = &D; if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { if (const Stmt *S = getSingleCompoundChild( + Ctx, D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers( /*IgnoreCaptured=*/true))) { Dir = dyn_cast(S); @@ -1961,7 +1981,7 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( llvm::SmallVector LastPrivates; llvm::SmallDenseMap MappedDeclsFields; if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) { - getDistributeLastprivateVars(D, LastPrivates); + getDistributeLastprivateVars(CGM.getContext(), D, LastPrivates); if (!LastPrivates.empty()) GlobalizedRD = ::buildRecordForGlobalizedVars( CGM.getContext(), llvm::None, LastPrivates, MappedDeclsFields); diff --git a/test/OpenMP/nvptx_SPMD_codegen.cpp b/test/OpenMP/nvptx_SPMD_codegen.cpp index 319b2c9d05..cf68ee7dd8 100644 --- a/test/OpenMP/nvptx_SPMD_codegen.cpp +++ b/test/OpenMP/nvptx_SPMD_codegen.cpp @@ -40,7 +40,7 @@ void foo() { for (int i = 0; i < 10; ++i) ; int a; -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) @@ -76,17 +76,28 @@ int a; // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) #pragma omp target teams + { + int b; #pragma omp distribute parallel for simd for (int i = 0; i < 10; ++i) ; + ; + } #pragma omp target teams + { + int b[] = {2, 3, sizeof(int)}; #pragma omp distribute parallel for simd schedule(static) for (int i = 0; i < 10; ++i) ; + } #pragma omp target teams + { + int b; #pragma omp distribute parallel for simd schedule(static, 1) for (int i = 0; i < 10; ++i) ; + int &c = b; + } #pragma omp target teams #pragma omp distribute parallel for simd schedule(auto) for (int i = 0; i < 10; ++i) diff --git a/test/OpenMP/nvptx_target_parallel_codegen.cpp b/test/OpenMP/nvptx_target_parallel_codegen.cpp index 6fccfbed56..bbde7bcff9 100644 --- a/test/OpenMP/nvptx_target_parallel_codegen.cpp +++ b/test/OpenMP/nvptx_target_parallel_codegen.cpp @@ -59,7 +59,7 @@ int bar(int n){ // CHECK: store i16* {{%.+}}, i16** [[AA_ADDR]], align // CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align // CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() - // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1, i16 1) + // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1, i16 0) // CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXEC:.+]] // @@ -102,7 +102,7 @@ int bar(int n){ // CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align // CHECK: [[B:%.+]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align // CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() - // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1, i16 1) + // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1, i16 0) // CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXEC:.+]] // diff --git a/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp b/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp index 163679d92b..905487fed7 100644 --- a/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp +++ b/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp @@ -47,7 +47,7 @@ int bar(int n){ } // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l22}}( - // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1) + // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0) // CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXEC:.+]] // @@ -69,7 +69,7 @@ int bar(int n){ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l26}}( - // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1) + // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0) // CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXEC:.+]] // @@ -90,7 +90,7 @@ int bar(int n){ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l31}}( - // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1) + // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0) // CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXEC:.+]] // diff --git a/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp b/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp index a3790f2851..0073db6f28 100644 --- a/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp +++ b/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp @@ -54,7 +54,7 @@ int bar(int n){ // CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l27}}( // - // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1) + // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0) // CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXECUTE:.+]] // @@ -242,7 +242,7 @@ int bar(int n){ // CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l32}}( // - // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1) + // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0) // CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXECUTE:.+]] // @@ -520,7 +520,7 @@ int bar(int n){ // CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l38}}( // - // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1) + // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0) // CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXECUTE:.+]] // diff --git a/test/OpenMP/nvptx_target_teams_codegen.cpp b/test/OpenMP/nvptx_target_teams_codegen.cpp index 5d3088d019..c62d254524 100644 --- a/test/OpenMP/nvptx_target_teams_codegen.cpp +++ b/test/OpenMP/nvptx_target_teams_codegen.cpp @@ -227,7 +227,7 @@ int bar(int n){ // CHECK: ret void // CHECK: define weak void @__omp_offloading_{{.*}}ftemplate{{.*}}_l37( -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0) // CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack( // CHECK-NOT: call void @__kmpc_serialized_parallel( diff --git a/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp index 0f95d5ad4e..8b8e0b0bba 100644 --- a/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ b/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -75,7 +75,7 @@ int bar(int n){ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l32( // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) // CHECK: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} 4, i16 1, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**)) // CHECK: [[TEAM_ALLOC:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]], // CHECK: [[BC:%.+]] = bitcast i8* [[TEAM_ALLOC]] to [[REC:%.+]]* diff --git a/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp index 3b56bfd9a7..395c10e663 100644 --- a/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -70,7 +70,7 @@ int bar(int n){ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l30( // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 1) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) // CHECK: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} 4, i16 1, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**)) // CHECK: [[TEAM_ALLOC:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]], // CHECK: [[BC:%.+]] = bitcast i8* [[TEAM_ALLOC]] to [[REC:%.+]]*