IsOffloadEntry, CodeGen);
}
+static void
+getDistributeLastprivateVars(const OMPExecutableDirective &D,
+ llvm::SmallVectorImpl<const ValueDecl *> &Vars);
+
void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader(
CodeGenFunction &CGF, EntryFunctionState &EST,
const OMPExecutableDirective &D) {
// Initialize the OMP state in the runtime; called by all active threads.
bool RequiresFullRuntime = CGM.getLangOpts().OpenMPCUDAForceFullRuntime ||
!supportsLightweightRuntime(CGF.getContext(), D);
+ // Check if we have inner distribute + lastprivate|reduction clauses.
+ bool RequiresDatasharing = RequiresFullRuntime;
+ if (!RequiresDatasharing) {
+ const OMPExecutableDirective *TD = &D;
+ if (!isOpenMPTeamsDirective(TD->getDirectiveKind()) &&
+ !isOpenMPParallelDirective(TD->getDirectiveKind())) {
+ const Stmt *S = getSingleCompoundChild(
+ TD->getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(
+ /*IgnoreCaptured=*/true));
+ TD = cast<OMPExecutableDirective>(S);
+ }
+ if (!isOpenMPDistributeDirective(TD->getDirectiveKind()) &&
+ !isOpenMPParallelDirective(TD->getDirectiveKind())) {
+ const Stmt *S = getSingleCompoundChild(
+ TD->getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(
+ /*IgnoreCaptured=*/true));
+ TD = cast<OMPExecutableDirective>(S);
+ }
+ if (isOpenMPDistributeDirective(TD->getDirectiveKind()))
+ RequiresDatasharing = TD->hasClausesOfKind<OMPLastprivateClause>() ||
+ TD->hasClausesOfKind<OMPReductionClause>();
+ }
llvm::Value *Args[] = {
getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true),
/*RequiresOMPRuntime=*/
Bld.getInt16(RequiresFullRuntime ? 1 : 0),
- /*RequiresDataSharing=*/Bld.getInt16(RequiresFullRuntime ? 1 : 0)};
+ /*RequiresDataSharing=*/Bld.getInt16(RequiresDatasharing ? 1 : 0)};
CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
for (int i = 0; i < 10; ++i)
;
int a;
-// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 1)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l32(
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
-// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 1)
// CHECK: [[TEAM_ALLOC:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 4, i16 0)
// CHECK: [[BC:%.+]] = bitcast i8* [[TEAM_ALLOC]] to [[REC:%.+]]*
// CHECK: getelementptr inbounds [[REC]], [[REC]]* [[BC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l30(
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
-// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 1)
// CHECK: [[TEAM_ALLOC:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 4, i16 0)
// CHECK: [[BC:%.+]] = bitcast i8* [[TEAM_ALLOC]] to [[REC:%.+]]*
// CHECK: getelementptr inbounds [[REC]], [[REC]]* [[BC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0