From: Alexey Bataev Date: Mon, 1 Oct 2018 16:20:57 +0000 (+0000) Subject: [OPENMP][NVPTX] Handle `requires datasharing` flag correctly with X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=faaac5c529209827dbf0f0f47790078deadbf4f6;p=clang [OPENMP][NVPTX] Handle `requires datasharing` flag correctly with lightweight runtime. The datasharing flag must be set to `1` when executing SPMD-mode compatible directive with reduction|lastprivate clauses. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@343492 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 833b145f04..96cf6c1173 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -1207,6 +1207,10 @@ void CGOpenMPRuntimeNVPTX::emitSPMDKernel(const OMPExecutableDirective &D, IsOffloadEntry, CodeGen); } +static void +getDistributeLastprivateVars(const OMPExecutableDirective &D, + llvm::SmallVectorImpl &Vars); + void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader( CodeGenFunction &CGF, EntryFunctionState &EST, const OMPExecutableDirective &D) { @@ -1219,11 +1223,33 @@ void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader( // Initialize the OMP state in the runtime; called by all active threads. bool RequiresFullRuntime = CGM.getLangOpts().OpenMPCUDAForceFullRuntime || !supportsLightweightRuntime(CGF.getContext(), D); + // Check if we have inner distribute + lastprivate|reduction clauses. + bool RequiresDatasharing = RequiresFullRuntime; + if (!RequiresDatasharing) { + const OMPExecutableDirective *TD = &D; + if (!isOpenMPTeamsDirective(TD->getDirectiveKind()) && + !isOpenMPParallelDirective(TD->getDirectiveKind())) { + const Stmt *S = getSingleCompoundChild( + TD->getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers( + /*IgnoreCaptured=*/true)); + TD = cast(S); + } + if (!isOpenMPDistributeDirective(TD->getDirectiveKind()) && + !isOpenMPParallelDirective(TD->getDirectiveKind())) { + const Stmt *S = getSingleCompoundChild( + TD->getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers( + /*IgnoreCaptured=*/true)); + TD = cast(S); + } + if (isOpenMPDistributeDirective(TD->getDirectiveKind())) + RequiresDatasharing = TD->hasClausesOfKind() || + TD->hasClausesOfKind(); + } llvm::Value *Args[] = { getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true), /*RequiresOMPRuntime=*/ Bld.getInt16(RequiresFullRuntime ? 1 : 0), - /*RequiresDataSharing=*/Bld.getInt16(RequiresFullRuntime ? 1 : 0)}; + /*RequiresDataSharing=*/Bld.getInt16(RequiresDatasharing ? 1 : 0)}; CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args); diff --git a/test/OpenMP/nvptx_SPMD_codegen.cpp b/test/OpenMP/nvptx_SPMD_codegen.cpp index c247cc3459..319b2c9d05 100644 --- a/test/OpenMP/nvptx_SPMD_codegen.cpp +++ b/test/OpenMP/nvptx_SPMD_codegen.cpp @@ -40,7 +40,7 @@ void foo() { for (int i = 0; i < 10; ++i) ; int a; -// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 1) // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) diff --git a/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp index 6539b0e3c9..01c753355c 100644 --- a/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ b/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -69,7 +69,7 @@ int bar(int n){ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l32( // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 1) // CHECK: [[TEAM_ALLOC:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 4, i16 0) // CHECK: [[BC:%.+]] = bitcast i8* [[TEAM_ALLOC]] to [[REC:%.+]]* // CHECK: getelementptr inbounds [[REC]], [[REC]]* [[BC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 diff --git a/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp index 5f7c071140..1555073cd0 100644 --- a/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -64,7 +64,7 @@ int bar(int n){ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l30( // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 1) // CHECK: [[TEAM_ALLOC:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 4, i16 0) // CHECK: [[BC:%.+]] = bitcast i8* [[TEAM_ALLOC]] to [[REC:%.+]]* // CHECK: getelementptr inbounds [[REC]], [[REC]]* [[BC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0