From f7f94e7dea4db4d58acc46dcde52c95139e0fa32 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 19 Apr 2016 16:36:01 +0000 Subject: [PATCH] Revert "[OPENMP] Codegen for untied tasks." This reverts commit r266754. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@266755 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGOpenMPRuntime.cpp | 151 ++++------------------ lib/CodeGen/CGOpenMPRuntime.h | 20 +-- lib/CodeGen/CGStmtOpenMP.cpp | 84 ++++++------ lib/Sema/SemaOpenMP.cpp | 11 +- test/OpenMP/task_codegen.cpp | 47 +------ test/OpenMP/task_firstprivate_codegen.cpp | 10 +- test/OpenMP/task_private_codegen.cpp | 9 +- 7 files changed, 92 insertions(+), 240 deletions(-) diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 828be92d54..7d12c3620d 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -72,8 +72,6 @@ public: /// \return LValue for thread id variable. This LValue always has type int32*. virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); - virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} - CGOpenMPRegionKind getRegionKind() const { return RegionKind; } OpenMPDirectiveKind getDirectiveKind() const { return Kind; } @@ -84,8 +82,6 @@ public: return Info->getKind() == CR_OpenMP; } - ~CGOpenMPRegionInfo() override = default; - protected: CGOpenMPRegionKind RegionKind; RegionCodeGenTy CodeGen; @@ -94,7 +90,7 @@ protected: }; /// \brief API for captured statement code generation in OpenMP constructs. -class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { +class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo { public: CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, const RegionCodeGenTy &CodeGen, @@ -125,62 +121,14 @@ private: }; /// \brief API for captured statement code generation in OpenMP constructs. -class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { +class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo { public: - class UntiedTaskActionTy final : public PrePostActionTy { - bool Untied; - const VarDecl *PartIDVar; - const RegionCodeGenTy &UntiedCodeGen; - llvm::SwitchInst *UntiedSwitch = nullptr; - - public: - UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, - const RegionCodeGenTy &UntiedCodeGen) - : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} - void Enter(CodeGenFunction &CGF) override { - if (Untied) { - // Emit task switching point. - auto PartIdLVal = CGF.EmitLoadOfPointerLValue( - CGF.GetAddrOfLocalVar(PartIDVar), - PartIDVar->getType()->castAs()); - auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation()); - auto *DoneBB = CGF.createBasicBlock(".untied.done."); - UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); - CGF.EmitBlock(DoneBB); - CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); - CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); - UntiedSwitch->addCase(CGF.Builder.getInt32(0), - CGF.Builder.GetInsertBlock()); - emitUntiedSwitch(CGF); - } - } - void emitUntiedSwitch(CodeGenFunction &CGF) const { - if (Untied) { - auto PartIdLVal = CGF.EmitLoadOfPointerLValue( - CGF.GetAddrOfLocalVar(PartIDVar), - PartIDVar->getType()->castAs()); - CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), - PartIdLVal); - UntiedCodeGen(CGF); - CodeGenFunction::JumpDest CurPoint = - CGF.getJumpDestInCurrentScope(".untied.next."); - CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); - CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); - UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), - CGF.Builder.GetInsertBlock()); - CGF.EmitBranchThroughCleanup(CurPoint); - CGF.EmitBlock(CurPoint.getBlock()); - } - } - unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } - }; CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, const RegionCodeGenTy &CodeGen, - OpenMPDirectiveKind Kind, bool HasCancel, - const UntiedTaskActionTy &Action) + OpenMPDirectiveKind Kind, bool HasCancel) : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), - ThreadIDVar(ThreadIDVar), Action(Action) { + ThreadIDVar(ThreadIDVar) { assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); } @@ -194,10 +142,6 @@ public: /// \brief Get the name of the capture helper. StringRef getHelperName() const override { return ".omp_outlined."; } - void emitUntiedSwitch(CodeGenFunction &CGF) override { - Action.emitUntiedSwitch(CGF); - } - static bool classof(const CGCapturedStmtInfo *Info) { return CGOpenMPRegionInfo::classof(Info) && cast(Info)->getRegionKind() == @@ -208,8 +152,6 @@ private: /// \brief A variable or parameter storing global thread id for OpenMP /// constructs. const VarDecl *ThreadIDVar; - /// Action for emitting code for untied tasks. - const UntiedTaskActionTy &Action; }; /// \brief API for inlined captured statement code generation in OpenMP @@ -268,11 +210,6 @@ public: llvm_unreachable("No helper name for inlined OpenMP construct"); } - void emitUntiedSwitch(CodeGenFunction &CGF) override { - if (OuterRegionInfo) - OuterRegionInfo->emitUntiedSwitch(CGF); - } - CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } static bool classof(const CGCapturedStmtInfo *Info) { @@ -280,8 +217,6 @@ public: cast(Info)->getRegionKind() == InlinedRegion; } - ~CGOpenMPInlinedRegionInfo() override = default; - private: /// \brief CodeGen info about outer OpenMP region. CodeGenFunction::CGCapturedStmtInfo *OldCSI; @@ -293,7 +228,7 @@ private: /// captured fields. The name of the target region has to be unique in a given /// application so it is provided by the client, because only the client has /// the information to generate that. -class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { +class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo { public: CGOpenMPTargetRegionInfo(const CapturedStmt &CS, const RegionCodeGenTy &CodeGen, StringRef HelperName) @@ -322,7 +257,7 @@ static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { } /// \brief API for generation of expressions captured in a innermost OpenMP /// region. -class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { +class CGOpenMPInnerExprInfo : public CGOpenMPInlinedRegionInfo { public: CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, @@ -822,36 +757,16 @@ llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction( llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - const VarDecl *PartIDVar, const VarDecl *TaskTVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, - bool Tied, unsigned &NumberOfParts) { - auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, - PrePostActionTy &) { - auto *ThreadID = getThreadID(CGF, D.getLocStart()); - auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart()); - llvm::Value *TaskArgs[] = { - UpLoc, ThreadID, - CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), - TaskTVar->getType()->castAs()) - .getPointer()}; - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); - }; - CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, - UntiedCodeGen); - CodeGen.setAction(Action); + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { assert(!ThreadIDVar->getType()->isPointerType() && "thread id variable must be of type kmp_int32 for tasks"); auto *CS = cast(D.getAssociatedStmt()); CodeGenFunction CGF(CGM, true); - CGOpenMPTaskOutlinedRegionInfo CGInfo( - *CS, ThreadIDVar, CodeGen, InnermostKind, - cast(D).hasCancel(), Action); + CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, + InnermostKind, + cast(D).hasCancel()); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - auto *Res = CGF.GenerateCapturedStmtFunction(*CS); - CodeGen.clearAction(); - if (!Tied) - NumberOfParts = Action.getNumberOfParts(); - return Res; + return CGF.GenerateCapturedStmtFunction(*CS); } Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { @@ -1983,8 +1898,6 @@ void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); - if (auto *Region = dyn_cast_or_null(CGF.CapturedStmtInfo)) - Region->emitUntiedSwitch(CGF); } void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, @@ -3038,7 +2951,7 @@ createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, /// argument. /// \code /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { -/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, +/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, /// tt->shareds); /// return 0; /// } @@ -3069,7 +2982,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, - // tt, tt->task_data.shareds); + // tt->task_data.shareds); auto *GtidParam = CGF.EmitLoadOfScalar( CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); LValue TDBase = CGF.EmitLoadOfPointerLValue( @@ -3082,7 +2995,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, auto *KmpTaskTQTyRD = cast(KmpTaskTQTy->getAsTagDecl()); auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); - auto *PartidParam = PartIdLVal.getPointer(); + auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal(); auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); @@ -3101,11 +3014,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, } llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam, - TaskPrivatesMap, - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - TDBase.getAddress(), CGF.VoidPtrTy) - .getPointer(), - SharedsParam}; + TaskPrivatesMap, SharedsParam}; CGF.EmitCallOrInvoke(TaskFunction, CallArgs); CGF.EmitStoreThroughLValue( RValue::get(CGF.Builder.getInt32(/*C=*/0)), @@ -3245,8 +3154,8 @@ static int array_pod_sort_comparator(const PrivateDataTy *P1, void CGOpenMPRuntime::emitTaskCall( CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, bool Tied, llvm::PointerIntPair Final, - unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy, - Address Shareds, const Expr *IfCond, ArrayRef PrivateVars, + llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, + const Expr *IfCond, ArrayRef PrivateVars, ArrayRef PrivateCopies, ArrayRef FirstprivateVars, ArrayRef FirstprivateCopies, @@ -3481,8 +3390,7 @@ void CGOpenMPRuntime::emitTaskCall( KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), ArrayType::Normal, /*IndexTypeQuals=*/0); // kmp_depend_info[] deps; - DependenciesArray = - CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); + DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy); for (unsigned i = 0; i < NumDependencies; ++i) { const Expr *E = Dependences[i].second; auto Addr = CGF.EmitLValue(E); @@ -3540,6 +3448,8 @@ void CGOpenMPRuntime::emitTaskCall( // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() // libcall. + // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t + // *new_task); // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence @@ -3557,25 +3467,18 @@ void CGOpenMPRuntime::emitTaskCall( DepTaskArgs[5] = CGF.Builder.getInt32(0); DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } - auto &&ThenCodeGen = [this, Tied, Loc, NumberOfParts, TDBase, KmpTaskTQTyRD, - NumDependencies, &TaskArgs, + auto &&ThenCodeGen = [NumDependencies, &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { - if (!Tied) { - auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); - auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); - CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); - } + // TODO: add check for untied tasks. + auto &RT = CGF.CGM.getOpenMPRuntime(); if (NumDependencies) { CGF.EmitRuntimeCall( - createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); + RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), + DepTaskArgs); } else { - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), + CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); } - // Check if parent region is untied and build return for untied task; - if (auto *Region = - dyn_cast_or_null(CGF.CapturedStmtInfo)) - Region->emitUntiedSwitch(CGF); }; llvm::Value *DepWaitTaskArgs[6]; @@ -4136,8 +4039,6 @@ void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; // Ignore return result until untied tasks are supported. CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); - if (auto *Region = dyn_cast_or_null(CGF.CapturedStmtInfo)) - Region->emitUntiedSwitch(CGF); } void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index 3b384bec99..019461842d 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -82,7 +82,6 @@ public: Callback(CallbackFn::type>), PrePostAction(nullptr) {} void setAction(PrePostActionTy &Action) const { PrePostAction = &Action; } - void clearAction() const { PrePostAction = nullptr; } void operator()(CodeGenFunction &CGF) const; }; @@ -456,25 +455,17 @@ public: OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen); /// \brief Emits outlined function for the OpenMP task directive \a D. This - /// outlined function has type void(*)(kmp_int32 ThreadID, struct task_t* - /// TaskT). + /// outlined function has type void(*)(kmp_int32 ThreadID, kmp_int32 + /// PartID, struct context_vars*). /// \param D OpenMP directive. /// \param ThreadIDVar Variable for thread id in the current OpenMP region. - /// \param PartIDVar Variable for partition id in the current OpenMP untied - /// task region. - /// \param TaskTVar Variable for task_t argument. /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - /// \param Tied true if task is generated for tied task, false otherwise. - /// \param NumberOfParts Number of parts in untied task. Ignored for tied - /// tasks. /// virtual llvm::Value *emitTaskOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - const VarDecl *PartIDVar, const VarDecl *TaskTVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, - bool Tied, unsigned &NumberOfParts); + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen); /// \brief Cleans up references to the objects in finished function. /// @@ -740,7 +731,6 @@ public: /// \param Tied true if the task is tied (the task is tied to the thread that /// can suspend its task region), false - untied (the task is not tied to any /// thread). - /// \param NumberOfParts Number of parts for untied task. /// \param Final Contains either constant bool value, or llvm::Value * of i1 /// type for final clause. If the value is true, the task forces all of its /// child tasks to become final and included tasks. @@ -767,8 +757,8 @@ public: virtual void emitTaskCall( CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, bool Tied, llvm::PointerIntPair Final, - unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy, - Address Shareds, const Expr *IfCond, ArrayRef PrivateVars, + llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, + const Expr *IfCond, ArrayRef PrivateVars, ArrayRef PrivateCopies, ArrayRef FirstprivateVars, ArrayRef FirstprivateCopies, diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index 3dfef3b0ed..92c05eabdb 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -2244,7 +2244,6 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { auto CapturedStruct = GenerateCapturedStmtArgument(*CS); auto *I = CS->getCapturedDecl()->param_begin(); auto *PartId = std::next(I); - auto *TaskT = std::next(I, 4); // The first function argument for tasks is a thread id, the second one is a // part id (0 for tied tasks, >=0 for untied task). llvm::DenseSet EmittedAsPrivate; @@ -2289,52 +2288,53 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); } } - auto &&CodeGen = [&S, &PrivateVars, &FirstprivateVars]( - CodeGenFunction &CGF, PrePostActionTy &Action) { - OMPPrivateScope Scope(CGF); + auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars]( + CodeGenFunction &CGF, PrePostActionTy &) { // Set proper addresses for generated private copies. auto *CS = cast(S.getAssociatedStmt()); - if (!PrivateVars.empty() || !FirstprivateVars.empty()) { - auto *CopyFn = CGF.Builder.CreateLoad( - CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); - auto *PrivatesPtr = CGF.Builder.CreateLoad( - CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); - // Map privates. - llvm::SmallVector, 16> PrivatePtrs; - llvm::SmallVector CallArgs; - CallArgs.push_back(PrivatesPtr); - for (auto *E : PrivateVars) { - auto *VD = cast(cast(E)->getDecl()); - Address PrivatePtr = CGF.CreateMemTemp( - CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); - PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); - CallArgs.push_back(PrivatePtr.getPointer()); - } - for (auto *E : FirstprivateVars) { - auto *VD = cast(cast(E)->getDecl()); - Address PrivatePtr = - CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), - ".firstpriv.ptr.addr"); - PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); - CallArgs.push_back(PrivatePtr.getPointer()); + { + OMPPrivateScope Scope(CGF); + if (!PrivateVars.empty() || !FirstprivateVars.empty()) { + auto *CopyFn = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); + auto *PrivatesPtr = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); + // Map privates. + llvm::SmallVector, 16> PrivatePtrs; + llvm::SmallVector CallArgs; + CallArgs.push_back(PrivatesPtr); + for (auto *E : PrivateVars) { + auto *VD = cast(cast(E)->getDecl()); + Address PrivatePtr = + CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); + PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); + CallArgs.push_back(PrivatePtr.getPointer()); + } + for (auto *E : FirstprivateVars) { + auto *VD = cast(cast(E)->getDecl()); + Address PrivatePtr = + CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); + PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); + CallArgs.push_back(PrivatePtr.getPointer()); + } + CGF.EmitRuntimeCall(CopyFn, CallArgs); + for (auto &&Pair : PrivatePtrs) { + Address Replacement(CGF.Builder.CreateLoad(Pair.second), + CGF.getContext().getDeclAlign(Pair.first)); + Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); + } } - CGF.EmitRuntimeCall(CopyFn, CallArgs); - for (auto &&Pair : PrivatePtrs) { - Address Replacement(CGF.Builder.CreateLoad(Pair.second), - CGF.getContext().getDeclAlign(Pair.first)); - Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); + (void)Scope.Privatize(); + if (*PartId) { + // TODO: emit code for untied tasks. } + CGF.EmitStmt(CS->getCapturedStmt()); } - (void)Scope.Privatize(); - - Action.Enter(CGF); - CGF.EmitStmt(CS->getCapturedStmt()); }; + auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( + S, *I, OMPD_task, CodeGen); // Check if we should emit tied or untied task. bool Tied = !S.getSingleClause(); - unsigned NumberOfParts; - auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( - S, *I, *PartId, *TaskT, OMPD_task, CodeGen, Tied, NumberOfParts); // Check if the task is final llvm::PointerIntPair Final; if (const auto *Clause = S.getSingleClause()) { @@ -2361,9 +2361,9 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { } OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitTaskCall( - *this, S.getLocStart(), S, Tied, Final, NumberOfParts, OutlinedFn, - SharedsTy, CapturedStruct, IfCond, PrivateVars, PrivateCopies, - FirstprivateVars, FirstprivateCopies, FirstprivateInits, Dependences); + *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy, + CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars, + FirstprivateCopies, FirstprivateInits, Dependences); } void CodeGenFunction::EmitOMPTaskyieldDirective( diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp index 01474518a5..3408989d0e 100644 --- a/lib/Sema/SemaOpenMP.cpp +++ b/lib/Sema/SemaOpenMP.cpp @@ -1610,11 +1610,12 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI); Sema::CapturedParamNameType Params[] = { std::make_pair(".global_tid.", KmpInt32Ty), - std::make_pair(".part_id.", Context.getPointerType(KmpInt32Ty)), - std::make_pair(".privates.", Context.VoidPtrTy.withConst()), - std::make_pair(".copy_fn.", - Context.getPointerType(CopyFnType).withConst()), - std::make_pair(".task_t.", Context.VoidPtrTy.withConst()), + std::make_pair(".part_id.", KmpInt32Ty), + std::make_pair(".privates.", + Context.VoidPtrTy.withConst().withRestrict()), + std::make_pair( + ".copy_fn.", + Context.getPointerType(CopyFnType).withConst().withRestrict()), std::make_pair(StringRef(), QualType()) // __context with shared vars }; ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, diff --git a/test/OpenMP/task_codegen.cpp b/test/OpenMP/task_codegen.cpp index 3bf8253f2a..b262745704 100644 --- a/test/OpenMP/task_codegen.cpp +++ b/test/OpenMP/task_codegen.cpp @@ -230,19 +230,6 @@ int main() { a = 4; c = 5; } -// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*)) -// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]{{.*}}* {{%.+}}, i32 0, i32 3 -// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]] -// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]]) -#pragma omp task untied - { - S s1; -#pragma omp task - a = 4; -#pragma omp taskyield - s1 = S(); -#pragma omp taskwait - } return a; } // CHECK: define internal i32 [[TASK_ENTRY1]](i32, [[KMP_TASK_T]]{{.*}}* noalias) @@ -253,42 +240,16 @@ int main() { // CHECK: store i32 10, i32* %{{.+}} // CHECK: define internal i32 [[TASK_ENTRY2]](i32, [[KMP_TASK_T]]{{.*}}* noalias) -// CHECK: store i32 1, i32* [[A_PTR]] +// CHECK: store i32 1, i32* [[A_PTR:@.+]] // CHECK: define internal i32 [[TASK_ENTRY3]](i32, [[KMP_TASK_T]]{{.*}}* noalias) -// CHECK: store i32 2, i32* [[A_PTR]] +// CHECK: store i32 2, i32* [[A_PTR:@.+]] // CHECK: define internal i32 [[TASK_ENTRY4]](i32, [[KMP_TASK_T]]{{.*}}* noalias) -// CHECK: store i32 3, i32* [[A_PTR]] +// CHECK: store i32 3, i32* [[A_PTR:@.+]] // CHECK: define internal i32 [[TASK_ENTRY5]](i32, [[KMP_TASK_T]]{{.*}}* noalias) -// CHECK: store i32 4, i32* [[A_PTR]] +// CHECK: store i32 4, i32* [[A_PTR:@.+]] // CHECK: store i32 5, i32* [[C_PTR:%.+]], align 128 - -// CHECK: define internal i32 -// CHECK: store i32 4, i32* [[A_PTR]] - -// CHECK: define internal i32 [[TASK_ENTRY6]](i32, [[KMP_TASK_T]]{{.*}}* noalias) -// CHECK: switch i32 %{{.+}}, label -// CHECK: load i32*, i32** % -// CHECK: store i32 1, i32* % -// CHECK: call i32 @__kmpc_omp_task(% - -// CHECK: call i8* @__kmpc_omp_task_alloc( -// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** % -// CHECK: call i32 @__kmpc_omp_task(% -// CHECK: load i32*, i32** % -// CHECK: store i32 2, i32* % -// CHECK: call i32 @__kmpc_omp_task(% - -// CHECK: call i32 @__kmpc_omp_taskyield(% -// CHECK: load i32*, i32** % -// CHECK: store i32 3, i32* % -// CHECK: call i32 @__kmpc_omp_task(% - -// CHECK: call i32 @__kmpc_omp_taskwait(% -// CHECK: load i32*, i32** % -// CHECK: store i32 4, i32* % -// CHECK: call i32 @__kmpc_omp_task(% #endif diff --git a/test/OpenMP/task_firstprivate_codegen.cpp b/test/OpenMP/task_firstprivate_codegen.cpp index 8abc603b8d..e2244140d1 100644 --- a/test/OpenMP/task_firstprivate_codegen.cpp +++ b/test/OpenMP/task_firstprivate_codegen.cpp @@ -422,11 +422,11 @@ int main() { // CHECK: ret void // CHECK: define internal i32 [[TASK_ENTRY]](i32, [[KMP_TASK_TMAIN_TY]]* noalias) -// CHECK: alloca i32*, -// CHECK-DAG: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, -// CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, -// CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, -// CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, + +// CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, +// CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, +// CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, // CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) diff --git a/test/OpenMP/task_private_codegen.cpp b/test/OpenMP/task_private_codegen.cpp index a2dc842326..1455fd11a9 100644 --- a/test/OpenMP/task_private_codegen.cpp +++ b/test/OpenMP/task_private_codegen.cpp @@ -331,11 +331,10 @@ int main() { // CHECK: define internal i32 [[TASK_ENTRY]](i32, [[KMP_TASK_TMAIN_TY]]* noalias) -// CHECK: alloca i32*, -// CHECK-DAG: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, -// CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, -// CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, -// CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, +// CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, +// CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, +// CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, // CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) -- 2.40.0