: public ConstStmtVisitor<CheckVarsEscapingDeclContext> {
CodeGenFunction &CGF;
llvm::SetVector<const ValueDecl *> EscapedDecls;
+ llvm::SmallPtrSet<const Decl *, 4> EscapedParameters;
llvm::SmallPtrSet<const ValueDecl *, 4> IgnoredDecls;
bool AllEscaped = false;
RecordDecl *GlobalizedRD = nullptr;
llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
void markAsEscaped(const ValueDecl *VD) {
- if (IgnoredDecls.count(VD) ||
- (CGF.CapturedStmtInfo &&
- CGF.CapturedStmtInfo->lookup(cast<VarDecl>(VD))))
+ if (IgnoredDecls.count(VD))
return;
+ // Variables captured by value must be globalized.
+ if (auto *CSI = CGF.CapturedStmtInfo) {
+ if (const FieldDecl *FD = CGF.CapturedStmtInfo->lookup(cast<VarDecl>(VD))) {
+ if (FD->getType()->isReferenceType())
+ return;
+ EscapedParameters.insert(VD);
+ }
+ }
EscapedDecls.insert(VD);
}
Visit(Child);
}
+ /// Returns the record that handles all the escaped local variables and used
+ /// instead of their original storage.
const RecordDecl *getGlobalizedRecord() {
if (!GlobalizedRD)
buildRecordForGlobalizedVars();
return GlobalizedRD;
}
+ /// Returns the field in the globalized record for the escaped variable.
const FieldDecl *getFieldForGlobalizedVar(const ValueDecl *VD) const {
assert(GlobalizedRD &&
"Record for globalized variables must be generated already.");
return I->getSecond();
}
+ /// Returns the list of the escaped local variables/parameters.
ArrayRef<const ValueDecl *> getEscapedDecls() const {
return EscapedDecls.getArrayRef();
}
+
+ /// Checks if the escaped local variable is actually a parameter passed by
+ /// value.
+ const llvm::SmallPtrSetImpl<const Decl *> &getEscapedParameters() const {
+ return EscapedParameters;
+ }
};
} // anonymous namespace
createNVPTXRuntimeFunction(
OMPRTL_NVPTX__kmpc_data_sharing_init_stack));
- const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
- if (I == FunctionGlobalizedDecls.end())
- return;
- const RecordDecl *GlobalizedVarsRecord = I->getSecond().first;
- QualType RecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord);
-
- // Recover pointer to this function's global record. The runtime will
- // handle the specifics of the allocation of the memory.
- // Use actual memory size of the record including the padding
- // for alignment purposes.
- unsigned Alignment =
- CGM.getContext().getTypeAlignInChars(RecTy).getQuantity();
- unsigned GlobalRecordSize =
- CGM.getContext().getTypeSizeInChars(RecTy).getQuantity();
- GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
- // TODO: allow the usage of shared memory to be controlled by
- // the user, for now, default to global.
- llvm::Value *GlobalRecordSizeArg[] = {
- llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
- CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
- llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
- GlobalRecordSizeArg);
- llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
- GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo());
- FunctionToGlobalRecPtr.try_emplace(CGF.CurFn, GlobalRecValue);
-
- // Emit the "global alloca" which is a GEP from the global declaration record
- // using the pointer returned by the runtime.
- LValue Base = CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, RecTy);
- auto &Res = I->getSecond().second;
- for (auto &Rec : Res) {
- const FieldDecl *FD = Rec.second.first;
- LValue VarAddr = CGF.EmitLValueForField(Base, FD);
- Rec.second.second = VarAddr.getAddress();
- }
+ emitGenericVarsProlog(CGF, WST.Loc);
}
void CGOpenMPRuntimeNVPTX::emitGenericEntryFooter(CodeGenFunction &CGF,
EntryFunctionState &EST) {
- const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
- if (I != FunctionGlobalizedDecls.end()) {
- if (!CGF.HaveInsertPoint())
- return;
- auto I = FunctionToGlobalRecPtr.find(CGF.CurFn);
- if (I != FunctionToGlobalRecPtr.end()) {
- llvm::Value *Args[] = {I->getSecond()};
- CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
- Args);
- }
- }
+ emitGenericVarsEpilog(CGF);
+ if (!CGF.HaveInsertPoint())
+ return;
if (!EST.ExitBB)
EST.ExitBB = CGF.createBasicBlock(".exit");
llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+ SourceLocation Loc = D.getLocStart();
+
+ // Emit target region as a standalone region.
+ class NVPTXPrePostActionTy : public PrePostActionTy {
+ SourceLocation &Loc;
+
+ public:
+ NVPTXPrePostActionTy(SourceLocation &Loc) : Loc(Loc) {}
+ void Enter(CodeGenFunction &CGF) override {
+ static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
+ .emitGenericVarsProlog(CGF, Loc);
+ }
+ void Exit(CodeGenFunction &CGF) override {
+ static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
+ .emitGenericVarsEpilog(CGF);
+ }
+ } Action(Loc);
+ CodeGen.setAction(Action);
auto *OutlinedFun =
cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen));
llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+ SourceLocation Loc = D.getLocStart();
+ // Emit target region as a standalone region.
+ class NVPTXPrePostActionTy : public PrePostActionTy {
+ SourceLocation &Loc;
+
+ public:
+ NVPTXPrePostActionTy(SourceLocation &Loc) : Loc(Loc) {}
+ void Enter(CodeGenFunction &CGF) override {
+ static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
+ .emitGenericVarsProlog(CGF, Loc);
+ }
+ void Exit(CodeGenFunction &CGF) override {
+ static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
+ .emitGenericVarsEpilog(CGF);
+ }
+ } Action(Loc);
+ CodeGen.setAction(Action);
llvm::Value *OutlinedFunVal = CGOpenMPRuntime::emitTeamsOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen);
llvm::Function *OutlinedFun = cast<llvm::Function>(OutlinedFunVal);
return OutlinedFun;
}
+void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
+ SourceLocation Loc) {
+ CGBuilderTy &Bld = CGF.Builder;
+
+ const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
+ if (I == FunctionGlobalizedDecls.end())
+ return;
+ const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord;
+ QualType RecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord);
+
+ // Recover pointer to this function's global record. The runtime will
+ // handle the specifics of the allocation of the memory.
+ // Use actual memory size of the record including the padding
+ // for alignment purposes.
+ unsigned Alignment =
+ CGM.getContext().getTypeAlignInChars(RecTy).getQuantity();
+ unsigned GlobalRecordSize =
+ CGM.getContext().getTypeSizeInChars(RecTy).getQuantity();
+ GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
+ // TODO: allow the usage of shared memory to be controlled by
+ // the user, for now, default to global.
+ llvm::Value *GlobalRecordSizeArg[] = {
+ llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
+ CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
+ llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
+ GlobalRecordSizeArg);
+ llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo());
+ LValue Base = CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, RecTy);
+ I->getSecond().GlobalRecordAddr = GlobalRecValue;
+
+ // Emit the "global alloca" which is a GEP from the global declaration record
+ // using the pointer returned by the runtime.
+ for (auto &Rec : I->getSecond().LocalVarData) {
+ bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);
+ llvm::Value *ParValue;
+ if (EscapedParam) {
+ const auto *VD = cast<VarDecl>(Rec.first);
+ LValue ParLVal =
+ CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
+ ParValue = CGF.EmitLoadOfScalar(ParLVal, Loc);
+ }
+ const FieldDecl *FD = Rec.second.first;
+ LValue VarAddr = CGF.EmitLValueForField(Base, FD);
+ Rec.second.second = VarAddr.getAddress();
+ if (EscapedParam) {
+ const auto *VD = cast<VarDecl>(Rec.first);
+ CGF.EmitStoreOfScalar(ParValue, VarAddr);
+ I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.getAddress());
+ }
+ }
+ I->getSecond().MappedParams->apply(CGF);
+}
+
+void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF) {
+ const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
+ if (I != FunctionGlobalizedDecls.end() && I->getSecond().GlobalRecordAddr) {
+ I->getSecond().MappedParams->restore(CGF);
+ if (!CGF.HaveInsertPoint())
+ return;
+ CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
+ I->getSecond().GlobalRecordAddr);
+ }
+}
+
void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
SourceLocation Loc,
llvm::Value *PtrV = Bld.CreateBitCast(V, CGF.VoidPtrTy);
CGF.EmitStoreOfScalar(PtrV, Dst, /*Volatile=*/false,
Ctx.getPointerType(Ctx.VoidPtrTy));
- Idx++;
+ ++Idx;
}
}
continue;
}
llvm::Value *TargetArg = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- NativeArg, NativeArg->getType()->getPointerElementType()->getPointerTo(
- /*AddrSpace=*/0));
+ NativeArg,
+ NativeArg->getType()->getPointerElementType()->getPointerTo());
TargetArgs.emplace_back(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TargetArg, TargetType));
}
auto *Fn = llvm::Function::Create(
CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
OutlinedParallelFn->getName() + "_wrapper", &CGM.getModule());
- CGM.SetInternalFunctionAttributes(/*D=*/GlobalDecl(), Fn, CGFI);
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
"Function is registered already.");
SmallVector<const ValueDecl *, 4> IgnoredDecls;
const Stmt *Body = nullptr;
+ bool NeedToDelayGlobalization = false;
if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
Body = FD->getBody();
} else if (const auto *BD = dyn_cast<BlockDecl>(D)) {
} else if (const auto *CD = dyn_cast<CapturedDecl>(D)) {
Body = CD->getBody();
if (CGF.CapturedStmtInfo->getKind() == CR_OpenMP) {
+ NeedToDelayGlobalization = true;
if (const auto *CS = dyn_cast<CapturedStmt>(Body)) {
IgnoredDecls.reserve(CS->capture_size());
for (const auto &Capture : CS->captures())
const RecordDecl *GlobalizedVarsRecord = VarChecker.getGlobalizedRecord();
if (!GlobalizedVarsRecord)
return;
- auto &Res =
- FunctionGlobalizedDecls
- .try_emplace(CGF.CurFn, GlobalizedVarsRecord, DeclToAddrMapTy())
- .first->getSecond()
- .second;
+ auto I = FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;
+ I->getSecond().MappedParams =
+ llvm::make_unique<CodeGenFunction::OMPMapVars>();
+ I->getSecond().GlobalRecord = GlobalizedVarsRecord;
+ I->getSecond().EscapedParameters.insert(
+ VarChecker.getEscapedParameters().begin(),
+ VarChecker.getEscapedParameters().end());
+ DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
- Res.insert(std::make_pair(VD, std::make_pair(FD, Address::invalid())));
+ Data.insert(std::make_pair(VD, std::make_pair(FD, Address::invalid())));
+ }
+ if (!NeedToDelayGlobalization) {
+ emitGenericVarsProlog(CGF, D->getLocStart());
+ struct GlobalizationScope final : EHScopeStack::Cleanup {
+ GlobalizationScope() = default;
+
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
+ static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
+ .emitGenericVarsEpilog(CGF);
+ }
+ };
+ CGF.EHStack.pushCleanup<GlobalizationScope>(NormalAndEHCleanup);
}
}
auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
if (I == FunctionGlobalizedDecls.end())
return Address::invalid();
- auto VDI = I->getSecond().second.find(VD);
- if (VDI == I->getSecond().second.end())
+ auto VDI = I->getSecond().LocalVarData.find(VD);
+ if (VDI == I->getSecond().LocalVarData.end())
return Address::invalid();
return VDI->second.second;
}
void CGOpenMPRuntimeNVPTX::functionFinished(CodeGenFunction &CGF) {
- FunctionToGlobalRecPtr.erase(CGF.CurFn);
FunctionGlobalizedDecls.erase(CGF.CurFn);
CGOpenMPRuntime::functionFinished(CGF);
}
/// function.
void emitGenericEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
+ /// Helper for generic variables globalization prolog.
+ void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc);
+
+ /// Helper for generic variables globalization epilog.
+ void emitGenericVarsEpilog(CodeGenFunction &CGF);
+
/// \brief Helper for Spmd mode target directive's entry function.
void emitSpmdEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
const OMPExecutableDirective &D);
/// The map of local variables to their addresses in the global memory.
using DeclToAddrMapTy = llvm::MapVector<const Decl *,
std::pair<const FieldDecl *, Address>>;
+ /// Set of the parameters passed by value escaping OpenMP context.
+ using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>;
+ struct FunctionData {
+ DeclToAddrMapTy LocalVarData;
+ const RecordDecl *GlobalRecord = nullptr;
+ llvm::Value *GlobalRecordAddr = nullptr;
+ EscapedParamsTy EscapedParameters;
+ std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams;
+ };
/// Maps the function to the list of the globalized variables with their
/// addresses.
- llvm::DenseMap<llvm::Function *,
- std::pair<const RecordDecl *, DeclToAddrMapTy>>
- FunctionGlobalizedDecls;
- /// Map from function to global record pointer.
- llvm::DenseMap<llvm::Function *, llvm::Value *> FunctionToGlobalRecPtr;
+ llvm::SmallDenseMap<llvm::Function *, FunctionData> FunctionGlobalizedDecls;
};
} // CodeGen namespace.
void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
// Emit parallel region as a standalone region.
- auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
OMPPrivateScope PrivateScope(CGF);
bool Copyins = CGF.EmitOMPCopyinClause(S);
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
CGF.EmitOMPPrivateClause(S, PrivateScope);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
+ Action.Enter(CGF);
CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
};
const OMPLoopDirective &S,
CodeGenFunction::JumpDest LoopExit) {
auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ PrePostActionTy &Action) {
+ Action.Enter(CGF);
bool HasCancel = false;
if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
const OMPParallelForDirective &S) {
// Emit directive as a combined directive that consists of two implicit
// directives: 'parallel' with 'for' directive.
- auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
emitDispatchForLoopBounds);
const OMPParallelForSimdDirective &S) {
// Emit directive as a combined directive that consists of two implicit
// directives: 'parallel' with 'for' directive.
- auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
emitDispatchForLoopBounds);
};
const OMPParallelSectionsDirective &S) {
// Emit directive as a combined directive that consists of two implicit
// directives: 'parallel' with 'sections' directive.
- auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
CGF.EmitSections(S);
};
emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
// Emit teams region as a standalone region.
- auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
OMPPrivateScope PrivateScope(CGF);
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
CGF.EmitOMPPrivateClause(S, PrivateScope);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
+ Action.Enter(CGF);
CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
};
// Emit teams region as a standalone region.
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ PrePostActionTy &Action) {
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
+ Action.Enter(CGF);
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
CodeGenDistribute);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
// Emit teams region as a standalone region.
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ PrePostActionTy &Action) {
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
+ Action.Enter(CGF);
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
CodeGenDistribute);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
// Emit teams region as a standalone region.
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ PrePostActionTy &Action) {
OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
+ Action.Enter(CGF);
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
CodeGenDistribute);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
// Emit teams region as a standalone region.
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ PrePostActionTy &Action) {
OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
+ Action.Enter(CGF);
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
CodeGenDistribute);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
// Emit teams region as a standalone region.
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ PrePostActionTy &Action) {
OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
+ Action.Enter(CGF);
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
CodeGenDistribute);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
// Emit teams region as a standalone region.
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ PrePostActionTy &Action) {
OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
+ Action.Enter(CGF);
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
// Emit teams region as a standalone region.
auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ PrePostActionTy &Action) {
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
+ Action.Enter(CGF);
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
// Emit teams region as a standalone region.
auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ PrePostActionTy &Action) {
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
+ Action.Enter(CGF);
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
// Get the captured statement associated with the 'parallel' region.
auto *CS = S.getCapturedStmt(OMPD_parallel);
Action.Enter(CGF);
- auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
CGF.EmitOMPPrivateClause(S, PrivateScope);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
+ Action.Enter(CGF);
// TODO: Add support for clauses.
CGF.EmitStmt(CS->getCapturedStmt());
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
Action.Enter(CGF);
// Emit directive as a combined directive that consists of two implicit
// directives: 'parallel' with 'for' directive.
- auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
CodeGenFunction::OMPCancelStackRAII CancelRegion(
CGF, OMPD_target_parallel_for, S.hasCancel());
CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
Action.Enter(CGF);
// Emit directive as a combined directive that consists of two implicit
// directives: 'parallel' with 'for' directive.
- auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
emitDispatchForLoopBounds);
};
--- /dev/null
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// expected-no-diagnostics
+
+int foo(int &a) { return a; }
+
+int bar() {
+ int a;
+ return foo(a);
+}
+
+// CHECK: define void @__omp_offloading_{{.*}}maini1{{.*}}_l[[@LINE+5]](i32* dereferenceable{{.*}})
+// CHECK-NOT: @__kmpc_data_sharing_push_stack
+
+int maini1() {
+ int a;
+#pragma omp target parallel map(from:a)
+ {
+ int b;
+ a = foo(b) + bar();
+ }
+ return a;
+}
+
+// parallel region
+// CHECK: define {{.*}}void @{{.*}}(i32* noalias {{.*}}, i32* noalias {{.*}}, i32* dereferenceable{{.*}})
+// CHECK: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 0)
+// CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_ST:%struct[.].*]]*
+// CHECK: [[B_ADDR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[GLOBALS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: call {{.*}}[[FOO:@.*foo.*]](i32* dereferenceable{{.*}} [[B_ADDR]])
+// CHECK: call {{.*}}[[BAR:@.*bar.*]]()
+// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[RES]])
+// CHECK: ret void
+
+// CHECK: define {{.*}}[[FOO]](i32* dereferenceable{{.*}})
+// CHECK-NOT: @__kmpc_data_sharing_push_stack
+
+// CHECK: define {{.*}}[[BAR]]()
+// CHECK: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 0)
+// CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_ST:%struct[.].*]]*
+// CHECK: [[A_ADDR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[GLOBALS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: call {{.*}}[[FOO]](i32* dereferenceable{{.*}} [[A_ADDR]])
+// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[RES]])
+// CHECK: ret i32
// CK1: store {{.+}} 0, {{.+}},
// CK1: store i{{[0-9]+}} [[ARGC]], i{{[0-9]+}}* [[ARGCADDR]],
// CK1-64: [[CONV:%.+]] = bitcast i{{[0-9]+}}* [[ARGCADDR]] to i{{[0-9]+}}*
-// CK1-64: store i{{[0-9]+}}* [[CONV]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
-// CK1-32: store i{{[0-9]+}}* [[ARGCADDR]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
+// CK1: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 4, i16 0)
+// CK1-64: [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV]]
+// CK1-32: [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[ARGCADDR]]
+// CK1: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CK1: store i{{[0-9]+}} [[ARG]], i{{[0-9]+}}* [[ARGCADDR]],
+// CK1: store i{{[0-9]+}}* [[ARGCADDR]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
// CK1: [[ARGCADDR_PTR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[ARGCADDR_PTR]],
// CK1: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[ARGCADDR_PTR_REF]],
// CK1-NOT: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(
// CK1: [[ARGCADDR_PTR:%.+]] = alloca i{{.+}}***,
// CK1: [[ARGCADDR:%.+]] = alloca i{{.+}}**,
// CK1: store i{{.+}}** [[ARGC]], i{{.+}}*** [[ARGCADDR]]
+// CK1: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} {{4|8}}, i16 0)
+// CK1: [[ARG:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** [[ARGCADDR]]
+// CK1: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CK1: store i{{[0-9]+}}** [[ARG]], i{{[0-9]+}}*** [[ARGCADDR]],
// CK1: store i8*** [[ARGCADDR]], i8**** [[ARGCADDR_PTR]],
// CK1: [[ARGCADDR_PTR_REF:%.+]] = load i{{.+}}**, i{{.+}}*** [[ARGCADDR_PTR]],
// CK1: store i{{[0-9]+}}** null, i{{[0-9]+}}*** [[ARGCADDR_PTR_REF]],
// CK2-64: [[ACONV:%.+]] = bitcast i64* [[AADDR]] to i32*
// CK2-64: [[BCONV:%.+]] = bitcast i64* [[BADDR]] to i32*
// CK2-64: [[CONV:%.+]] = bitcast i64* [[ARGCADDR]] to i32*
-// CK2-64: store i{{[0-9]+}}* [[CONV]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
-// CK2-32: store i{{[0-9]+}}* [[ARGCADDR]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
+// CK2: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 4, i16 0)
+// CK2-64: [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV]]
+// CK2-32: [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[ARGCADDR]]
+// CK2: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CK2: store i{{[0-9]+}} [[ARG]], i{{[0-9]+}}* [[ARGCADDR]],
+// CK2: store i{{[0-9]+}}* [[ARGCADDR]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
// CK2: [[ARGCADDR_PTR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[ARGCADDR_PTR]],
// CK2: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[ARGCADDR_PTR_REF]],
// CK2-NOT: {{.+}} = call i32 @__kmpc_push_num_teams(
// CK2: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[AADDR]],
// CK2: store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[BADDR]],
// CK2: store i{{[0-9]+}}** [[ARGC]], i{{[0-9]+}}*** [[ARGCADDR]],
+// CK2: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} {{4|8}}, i16 0)
+// CK2: [[ARG:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** [[ARGCADDR]]
+// CK2: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CK2: store i{{[0-9]+}}** [[ARG]], i{{[0-9]+}}*** [[ARGCADDR]],
// CK2: store i{{[0-9]+}}*** [[ARGCADDR]], i{{[0-9]+}}**** [[ARGCADDR_PTR]],
// CK2: [[ARGCADDR_PTR_REF:%.+]] = load i{{[0-9]+}}***, i{{[0-9]+}}**** [[ARGCADDR_PTR]],
// CK2: store i{{[0-9]+}}** null, i{{[0-9]+}}*** [[ARGCADDR_PTR_REF]],