NB_Parallel = 1,
};
+typedef std::pair<CharUnits /*Align*/, const ValueDecl *> VarsDataTy;
+static bool stable_sort_comparator(const VarsDataTy P1, const VarsDataTy P2) {
+ return P1.first > P2.first;
+}
+
+static RecordDecl *buildRecordForGlobalizedVars(
+ ASTContext &C, ArrayRef<const ValueDecl *> EscapedDecls,
+ llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
+ &MappedDeclsFields) {
+ if (EscapedDecls.empty())
+ return nullptr;
+ SmallVector<VarsDataTy, 4> GlobalizedVars;
+ for (const ValueDecl *D : EscapedDecls)
+ GlobalizedVars.emplace_back(C.getDeclAlign(D), D);
+ std::stable_sort(GlobalizedVars.begin(), GlobalizedVars.end(),
+ stable_sort_comparator);
+ // Build struct _globalized_locals_ty {
+ // /* globalized vars */
+ // };
+ RecordDecl *GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty");
+ GlobalizedRD->startDefinition();
+ for (const auto &Pair : GlobalizedVars) {
+ const ValueDecl *VD = Pair.second;
+ QualType Type = VD->getType();
+ if (Type->isLValueReferenceType())
+ Type = C.getPointerType(Type.getNonReferenceType());
+ else
+ Type = Type.getNonReferenceType();
+ SourceLocation Loc = VD->getLocation();
+ auto *Field =
+ FieldDecl::Create(C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
+ C.getTrivialTypeSourceInfo(Type, SourceLocation()),
+ /*BW=*/nullptr, /*Mutable=*/false,
+ /*InitStyle=*/ICIS_NoInit);
+ Field->setAccess(AS_public);
+ GlobalizedRD->addDecl(Field);
+ if (VD->hasAttrs()) {
+ for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
+ E(VD->getAttrs().end());
+ I != E; ++I)
+ Field->addAttr(*I);
+ }
+ MappedDeclsFields.try_emplace(VD, Field);
+ }
+ GlobalizedRD->completeDefinition();
+ return GlobalizedRD;
+}
+
/// Get the list of variables that can escape their declaration context.
class CheckVarsEscapingDeclContext final
: public ConstStmtVisitor<CheckVarsEscapingDeclContext> {
}
}
- typedef std::pair<CharUnits /*Align*/, const ValueDecl *> VarsDataTy;
- static bool stable_sort_comparator(const VarsDataTy P1, const VarsDataTy P2) {
- return P1.first > P2.first;
- }
-
void buildRecordForGlobalizedVars() {
assert(!GlobalizedRD &&
"Record for globalized variables is built already.");
- if (EscapedDecls.empty())
- return;
- ASTContext &C = CGF.getContext();
- SmallVector<VarsDataTy, 4> GlobalizedVars;
- for (const ValueDecl *D : EscapedDecls)
- GlobalizedVars.emplace_back(C.getDeclAlign(D), D);
- std::stable_sort(GlobalizedVars.begin(), GlobalizedVars.end(),
- stable_sort_comparator);
- // Build struct _globalized_locals_ty {
- // /* globalized vars */
- // };
- GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty");
- GlobalizedRD->startDefinition();
- for (const auto &Pair : GlobalizedVars) {
- const ValueDecl *VD = Pair.second;
- QualType Type = VD->getType();
- if (Type->isLValueReferenceType())
- Type = C.getPointerType(Type.getNonReferenceType());
- else
- Type = Type.getNonReferenceType();
- SourceLocation Loc = VD->getLocation();
- auto *Field = FieldDecl::Create(
- C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
- C.getTrivialTypeSourceInfo(Type, SourceLocation()),
- /*BW=*/nullptr, /*Mutable=*/false,
- /*InitStyle=*/ICIS_NoInit);
- Field->setAccess(AS_public);
- GlobalizedRD->addDecl(Field);
- if (VD->hasAttrs()) {
- for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
- E(VD->getAttrs().end());
- I != E; ++I)
- Field->addAttr(*I);
- }
- MappedDeclsFields.try_emplace(VD, Field);
- }
- GlobalizedRD->completeDefinition();
+ GlobalizedRD = ::buildRecordForGlobalizedVars(
+ CGF.getContext(), EscapedDecls.getArrayRef(), MappedDeclsFields);
}
public:
return false;
}
-/// Checks if the directive is the distribute clause with the lastprivate
-/// clauses. This construct does not support SPMD execution mode.
-static bool hasDistributeWithLastprivateClauses(const OMPExecutableDirective &D) {
- return isOpenMPDistributeDirective(D.getDirectiveKind()) &&
- D.hasClausesOfKind<OMPLastprivateClause>();
-}
-
/// Check for inner (nested) SPMD construct, if any
static bool hasNestedSPMDDirective(ASTContext &Ctx,
const OMPExecutableDirective &D) {
switch (D.getDirectiveKind()) {
case OMPD_target:
if (isOpenMPParallelDirective(DKind) &&
- !hasParallelIfNumThreadsClause(Ctx, *NestedDir) &&
- !hasDistributeWithLastprivateClauses(*NestedDir))
+ !hasParallelIfNumThreadsClause(Ctx, *NestedDir))
return true;
if (DKind == OMPD_teams) {
Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
if (isOpenMPParallelDirective(DKind) &&
- !hasParallelIfNumThreadsClause(Ctx, *NND) &&
- !hasDistributeWithLastprivateClauses(*NND))
+ !hasParallelIfNumThreadsClause(Ctx, *NND))
return true;
}
}
return false;
case OMPD_target_teams:
return isOpenMPParallelDirective(DKind) &&
- !hasParallelIfNumThreadsClause(Ctx, *NestedDir) &&
- !hasDistributeWithLastprivateClauses(*NestedDir);
+ !hasParallelIfNumThreadsClause(Ctx, *NestedDir);
case OMPD_target_simd:
case OMPD_target_parallel:
case OMPD_target_parallel_for:
case OMPD_target_teams_distribute_parallel_for:
case OMPD_target_teams_distribute_parallel_for_simd:
// Distribute with lastprivates requires non-SPMD execution mode.
- return !hasParallelIfNumThreadsClause(Ctx, D) &&
- !hasDistributeWithLastprivateClauses(D);
+ return !hasParallelIfNumThreadsClause(Ctx, D);
case OMPD_target_simd:
case OMPD_target_teams_distribute:
case OMPD_target_teams_distribute_simd:
return OutlinedFun;
}
+/// Get list of lastprivate variables from the teams distribute ... or
+/// teams {distribute ...} directives.
+static void
+getDistributeLastprivateVars(const OMPExecutableDirective &D,
+ llvm::SmallVectorImpl<const ValueDecl *> &Vars) {
+ assert(isOpenMPTeamsDirective(D.getDirectiveKind()) &&
+ "expected teams directive.");
+ const OMPExecutableDirective *Dir = &D;
+ if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
+ if (const Stmt *S = getSingleCompoundChild(
+ D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(
+ /*IgnoreCaptured=*/true))) {
+ Dir = dyn_cast<OMPExecutableDirective>(S);
+ if (Dir && !isOpenMPDistributeDirective(Dir->getDirectiveKind()))
+ Dir = nullptr;
+ }
+ }
+ if (!Dir)
+ return;
+ for (const OMPLastprivateClause *C :
+ Dir->getClausesOfKind<OMPLastprivateClause>()) {
+ for (const Expr *E : C->getVarRefs()) {
+ const auto *DE = cast<DeclRefExpr>(E->IgnoreParens());
+ Vars.push_back(cast<ValueDecl>(DE->getDecl()->getCanonicalDecl()));
+ }
+ }
+}
+
llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
SourceLocation Loc = D.getBeginLoc();
+ const RecordDecl *GlobalizedRD = nullptr;
+ llvm::SmallVector<const ValueDecl *, 4> LastPrivates;
+ llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
+ if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) {
+ getDistributeLastprivateVars(D, LastPrivates);
+ if (!LastPrivates.empty())
+ GlobalizedRD = buildRecordForGlobalizedVars(
+ CGM.getContext(), LastPrivates, MappedDeclsFields);
+ }
+
// Emit target region as a standalone region.
class NVPTXPrePostActionTy : public PrePostActionTy {
SourceLocation &Loc;
+ const RecordDecl *GlobalizedRD;
+ llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
+ &MappedDeclsFields;
public:
- NVPTXPrePostActionTy(SourceLocation &Loc) : Loc(Loc) {}
+ NVPTXPrePostActionTy(
+ SourceLocation &Loc, const RecordDecl *GlobalizedRD,
+ llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
+ &MappedDeclsFields)
+ : Loc(Loc), GlobalizedRD(GlobalizedRD),
+ MappedDeclsFields(MappedDeclsFields) {}
void Enter(CodeGenFunction &CGF) override {
- static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
- .emitGenericVarsProlog(CGF, Loc);
+ auto &Rt =
+ static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime());
+ if (GlobalizedRD) {
+ auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;
+ I->getSecond().GlobalRecord = GlobalizedRD;
+ I->getSecond().MappedParams =
+ llvm::make_unique<CodeGenFunction::OMPMapVars>();
+ DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
+ for (const auto &Pair : MappedDeclsFields) {
+ assert(Pair.getFirst()->isCanonicalDecl() &&
+ "Expected canonical declaration");
+ Data.insert(std::make_pair(
+ Pair.getFirst(),
+ std::make_pair(Pair.getSecond(), Address::invalid())));
+ }
+ }
+ Rt.emitGenericVarsProlog(CGF, Loc);
}
void Exit(CodeGenFunction &CGF) override {
static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
.emitGenericVarsEpilog(CGF);
}
- } Action(Loc);
- if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD)
- CodeGen.setAction(Action);
+ } Action(Loc, GlobalizedRD, MappedDeclsFields);
+ CodeGen.setAction(Action);
llvm::Value *OutlinedFunVal = CGOpenMPRuntime::emitTeamsOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen);
llvm::Function *OutlinedFun = cast<llvm::Function>(OutlinedFunVal);
void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
SourceLocation Loc,
bool WithSPMDCheck) {
- if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic)
+ if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic &&
+ getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD)
return;
CGBuilderTy &Bld = CGF.Builder;
I->getSecond().GlobalRecordAddr = Phi;
I->getSecond().IsInSPMDModeFlag = IsSPMD;
} else {
- assert(getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_NonSPMD &&
- "Expected Non-SPMD construct.");
// TODO: allow the usage of shared memory to be controlled by
// the user, for now, default to global.
llvm::Value *GlobalRecordSizeArg[] = {
void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF,
bool WithSPMDCheck) {
- if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic)
+ if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic &&
+ getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD)
return;
const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr));
CGF.EmitBlock(ExitBB);
} else {
- assert(getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_NonSPMD &&
- "Expected Non-SPMD mode.");
CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
I->getSecond().GlobalRecordAddr);
} else if (const auto *CD = dyn_cast<CapturedDecl>(D)) {
Body = CD->getBody();
NeedToDelayGlobalization = CGF.CapturedStmtInfo->getKind() == CR_OpenMP;
+ if (NeedToDelayGlobalization &&
+ getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD)
+ return;
}
if (!Body)
return;
#ifndef HEADER
#define HEADER
-// Check that the execution mode of the target region with lastprivates on the gpu is set to Non-SPMD Mode.
-// CHECK-DAG: {{@__omp_offloading_.+l33}}_exec_mode = weak constant i8 1
-// Check that the execution mode of all 4 target regions on the gpu is set to SPMD Mode.
-// CHECK-DAG: {{@__omp_offloading_.+l39}}_exec_mode = weak constant i8 0
-// CHECK-DAG: {{@__omp_offloading_.+l44}}_exec_mode = weak constant i8 0
-// CHECK-DAG: {{@__omp_offloading_.+l49}}_exec_mode = weak constant i8 0
-// CHECK-DAG: {{@__omp_offloading_.+l57}}_exec_mode = weak constant i8 0
+// Check that the execution mode of all 5 target regions on the gpu is set to SPMD Mode.
+// CHECK-DAG: {{@__omp_offloading_.+l32}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l38}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l43}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l48}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l56}}_exec_mode = weak constant i8 0
#define N 1000
#define M 10
return a;
}
-// CHECK_LABEL: define internal void @__omp_offloading_{{.+}}_l33_worker()
-
-// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l33(
-// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1)
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l32(
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
+// CHECK: [[TEAM_ALLOC:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 4, i16 0)
+// CHECK: [[BC:%.+]] = bitcast i8* [[TEAM_ALLOC]] to [[REC:%.+]]*
+// CHECK: getelementptr inbounds [[REC]], [[REC]]* [[BC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
-// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[OUTL1:@__omp_outlined.*]]_wrapper to i8*), i16 1)
+// CHECK: {{call|invoke}} void [[OUTL1:@.+]](
// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_kernel_deinit(i16 1)
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
// CHECK: ret void
// CHECK: define internal void [[OUTL1]](
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: ret void
-// CHECK: define weak void @__omp_offloading_{{.*}}_l57(i[[SZ:64|32]] %{{[^,]+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{[^)]+}})
+// CHECK: define weak void @__omp_offloading_{{.*}}_l56(i[[SZ:64|32]] %{{[^,]+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{[^)]+}})
// CHECK: call void [[OUTLINED:@__omp_outlined.*]](i32* %{{.+}}, i32* %{{.+}}, i[[SZ]] %{{.*}}, i[[SZ]] %{{.*}}, i[[SZ]] %{{.*}}, [1000 x i32]* %{{.*}}, i32* %{{.*}})
// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}} i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{.*}})