From 8f0271771bd348fc5a8ebfd9a30d9f67e9d298ea Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 22 May 2015 08:56:35 +0000 Subject: [PATCH] [OPENMP] Prepare codegen for privates in tasks for non-capturing of privates in CapturedStmt. Reworked codegen for privates in tasks: call @kmpc_omp_task_alloc(); ... call @kmpc_omp_task(task_proxy); void map_privates(.privates_rec. *privs, type1 ** priv1_ref, ..., typen **privn_ref) { *priv1_ref = &privs->private1; ... *privn_ref = &privs->privaten; ret void } i32 task_entry(i32 ThreadId, i32 PartId, void* privs, void (void*, ...) map_privates, shareds* captures) { type1 **priv1; ... typen **privn; call map_privates(privs, priv1, ..., privn); . ret i32 } i32 task_proxy(i32 ThreadId, kmp_task_t_with_privates *tt) { call task_entry(ThreadId, tt->task_data.PartId, &tt->privates, map_privates, tt->task_data.shareds); } git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@238010 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGOpenMPRuntime.cpp | 160 ++++++++++++++++++---- lib/CodeGen/CGStmtOpenMP.cpp | 104 +++++++++----- lib/Sema/SemaOpenMP.cpp | 9 ++ test/OpenMP/task_firstprivate_codegen.cpp | 121 ++++++++-------- test/OpenMP/task_private_codegen.cpp | 125 +++++++++-------- 5 files changed, 343 insertions(+), 176 deletions(-) diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 789302c885..1238accf42 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1719,7 +1719,8 @@ createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, /// argument. /// \code /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { -/// TaskFunction(gtid, tt->part_id, tt->shareds); +/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, +/// tt->shareds); /// return 0; /// } /// \endcode @@ -1727,7 +1728,8 @@ static llvm::Value * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, - QualType SharedsPtrTy, llvm::Value *TaskFunction) { + QualType SharedsPtrTy, llvm::Value *TaskFunction, + llvm::Value *TaskPrivatesMap) { auto &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); @@ -1748,18 +1750,19 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, CGF.disableDebugInfo(); CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); - // TaskFunction(gtid, tt->task_data.part_id, tt->task_data.shareds); + // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, + // tt->task_data.shareds); auto *GtidParam = CGF.EmitLoadOfScalar( CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc); auto *TaskTypeArgAddr = CGF.Builder.CreateAlignedLoad( CGF.GetAddrOfLocalVar(&TaskTypeArg), CGM.PointerAlignInBytes); - LValue Base = + LValue TDBase = CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy); auto *KmpTaskTWithPrivatesQTyRD = cast(KmpTaskTWithPrivatesQTy->getAsTagDecl()); - Base = - CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); + LValue Base = + CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); auto *KmpTaskTQTyRD = cast(KmpTaskTQTy->getAsTagDecl()); auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); @@ -1771,7 +1774,18 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(), CGF.ConvertTypeForMem(SharedsPtrTy)); - llvm::Value *CallArgs[] = {GtidParam, PartidParam, SharedsParam}; + auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); + llvm::Value *PrivatesParam; + if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { + auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); + PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + PrivatesLVal.getAddress(), CGF.VoidPtrTy); + } else { + PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + } + + llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam, + TaskPrivatesMap, SharedsParam}; CGF.EmitCallOrInvoke(TaskFunction, CallArgs); CGF.EmitStoreThroughLValue( RValue::get(CGF.Builder.getInt32(/*C=*/0)), @@ -1825,6 +1839,90 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, return DestructorFn; } +/// \brief Emit a privates mapping function for correct handling of private and +/// firstprivate variables. +/// \code +/// void .omp_task_privates_map.(const .privates. *noalias privs, +/// **noalias priv1,..., **noalias privn) { +/// *priv1 = &.privates.priv1; +/// ...; +/// *privn = &.privates.privn; +/// } +/// \endcode +static llvm::Value * +emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, + const ArrayRef PrivateVars, + const ArrayRef FirstprivateVars, + QualType PrivatesQTy, + const ArrayRef Privates) { + auto &C = CGM.getContext(); + FunctionArgList Args; + ImplicitParamDecl TaskPrivatesArg( + C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.getPointerType(PrivatesQTy).withConst().withRestrict()); + Args.push_back(&TaskPrivatesArg); + llvm::DenseMap PrivateVarsPos; + unsigned Counter = 1; + for (auto *E: PrivateVars) { + Args.push_back(ImplicitParamDecl::Create( + C, /*DC=*/nullptr, Loc, + /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) + .withConst() + .withRestrict())); + auto *VD = cast(cast(E)->getDecl()); + PrivateVarsPos[VD] = Counter; + ++Counter; + } + for (auto *E : FirstprivateVars) { + Args.push_back(ImplicitParamDecl::Create( + C, /*DC=*/nullptr, Loc, + /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) + .withConst() + .withRestrict())); + auto *VD = cast(cast(E)->getDecl()); + PrivateVarsPos[VD] = Counter; + ++Counter; + } + FunctionType::ExtInfo Info; + auto &TaskPrivatesMapFnInfo = + CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info, + /*isVariadic=*/false); + auto *TaskPrivatesMapTy = + CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); + auto *TaskPrivatesMap = llvm::Function::Create( + TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, + ".omp_task_privates_map.", &CGM.getModule()); + CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskPrivatesMapFnInfo, + TaskPrivatesMap); + TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); + CodeGenFunction CGF(CGM); + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, + TaskPrivatesMapFnInfo, Args); + + // *privi = &.privates.privi; + auto *TaskPrivatesArgAddr = CGF.Builder.CreateAlignedLoad( + CGF.GetAddrOfLocalVar(&TaskPrivatesArg), CGM.PointerAlignInBytes); + LValue Base = + CGF.MakeNaturalAlignAddrLValue(TaskPrivatesArgAddr, PrivatesQTy); + auto *PrivatesQTyRD = cast(PrivatesQTy->getAsTagDecl()); + Counter = 0; + for (auto *Field : PrivatesQTyRD->fields()) { + auto FieldLVal = CGF.EmitLValueForField(Base, Field); + auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; + auto RefLVal = CGF.MakeNaturalAlignAddrLValue(CGF.GetAddrOfLocalVar(VD), + VD->getType()); + auto RefLoadRVal = CGF.EmitLoadOfLValue(RefLVal, Loc); + CGF.EmitStoreOfScalar( + FieldLVal.getAddress(), + CGF.MakeNaturalAlignAddrLValue(RefLoadRVal.getScalarVal(), + RefLVal.getType()->getPointeeType())); + ++Counter; + } + CGF.FinishFunction(); + return TaskPrivatesMap; +} + static int array_pod_sort_comparator(const PrivateDataTy *P1, const PrivateDataTy *P2) { return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); @@ -1841,7 +1939,7 @@ void CGOpenMPRuntime::emitTaskCall( const ArrayRef FirstprivateInits) { auto &C = CGM.getContext(); llvm::SmallVector Privates; - // Aggeregate privates and sort them by the alignment. + // Aggregate privates and sort them by the alignment. auto I = PrivateCopies.begin(); for (auto *E : PrivateVars) { auto *VD = cast(cast(E)->getDecl()); @@ -1885,11 +1983,27 @@ void CGOpenMPRuntime::emitTaskCall( CGM.getSize(C.getTypeSizeInChars(KmpTaskTWithPrivatesQTy)); QualType SharedsPtrTy = C.getPointerType(SharedsTy); + // Emit initial values for private copies (if any). + llvm::Value *TaskPrivatesMap = nullptr; + auto *TaskPrivatesMapTy = + std::next(cast(TaskFunction)->getArgumentList().begin(), + 3) + ->getType(); + if (!Privates.empty()) { + auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); + TaskPrivatesMap = emitTaskPrivateMappingFunction( + CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates); + TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + TaskPrivatesMap, TaskPrivatesMapTy); + } else { + TaskPrivatesMap = llvm::ConstantPointerNull::get( + cast(TaskPrivatesMapTy)); + } // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, // kmp_task_t *tt); auto *TaskEntry = emitProxyTaskFunction( CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy, - KmpTaskTQTy, SharedsPtrTy, TaskFunction); + KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap); // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, @@ -1937,10 +2051,13 @@ void CGOpenMPRuntime::emitTaskCall( auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); auto PrivatesBase = CGF.EmitLValueForField(Base, *FI); FI = cast(FI->getType()->getAsTagDecl())->field_begin(); - LValue SharedsBase = CGF.MakeNaturalAlignAddrLValue( - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), - SharedsTy); + LValue SharedsBase; + if (!FirstprivateVars.empty()) { + SharedsBase = CGF.MakeNaturalAlignAddrLValue( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), + SharedsTy); + } CodeGenFunction::CGCapturedStmtInfo CapturesInfo( cast(*D.getAssociatedStmt())); for (auto &&Pair : Privates) { @@ -2000,22 +2117,7 @@ void CGOpenMPRuntime::emitTaskCall( } } NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType(); - // Copy addresses of privates to corresponding references in the list of - // captured variables. - // ... - // tt->shareds.var_addr = &tt->privates.private_var; - // ... - auto *OriginalVD = Pair.second.Original; - auto *SharedField = CapturesInfo.lookup(OriginalVD); - auto SharedRefLValue = - CGF.EmitLValueForFieldInitialization(SharedsBase, SharedField); - CGF.EmitStoreThroughLValue( - RValue::get(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - PrivateLValue.getAddress(), SharedRefLValue.getAddress() - ->getType() - ->getPointerElementType())), - SharedRefLValue); - ++FI, ++I; + ++FI; } } // Provide pointer to function with destructors for privates. diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index 78412e4734..07fc6e966a 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -1434,39 +1434,9 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { auto *PartId = std::next(I); // The first function argument for tasks is a thread id, the second one is a // part id (0 for tied tasks, >=0 for untied task). - auto &&CodeGen = [PartId, &S](CodeGenFunction &CGF) { - if (*PartId) { - // TODO: emit code for untied tasks. - } - CGF.EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); - }; - auto OutlinedFn = - CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, CodeGen); - // Check if we should emit tied or untied task. - bool Tied = !S.getSingleClause(OMPC_untied); - // Check if the task is final - llvm::PointerIntPair Final; - if (auto *Clause = S.getSingleClause(OMPC_final)) { - // If the condition constant folds and can be elided, try to avoid emitting - // the condition and the dead arm of the if/else. - auto *Cond = cast(Clause)->getCondition(); - bool CondConstant; - if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) - Final.setInt(CondConstant); - else - Final.setPointer(EvaluateExprAsBool(Cond)); - } else { - // By default the task is not final. - Final.setInt(/*IntVal=*/false); - } - auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); - const Expr *IfCond = nullptr; - if (auto C = S.getSingleClause(OMPC_if)) { - IfCond = cast(C)->getCondition(); - } llvm::DenseSet EmittedAsPrivate; // Get list of private variables. - llvm::SmallVector Privates; + llvm::SmallVector PrivateVars; llvm::SmallVector PrivateCopies; for (auto &&I = S.getClausesOfKind(OMPC_private); I; ++I) { auto *C = cast(*I); @@ -1474,7 +1444,7 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { for (auto *IInit : C->private_copies()) { auto *OrigVD = cast(cast(*IRef)->getDecl()); if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { - Privates.push_back(*IRef); + PrivateVars.push_back(*IRef); PrivateCopies.push_back(IInit); } ++IRef; @@ -1499,9 +1469,77 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { ++IRef, ++IElemInitRef; } } + auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars]( + CodeGenFunction &CGF) { + // Set proper addresses for generated private copies. + auto *CS = cast(S.getAssociatedStmt()); + OMPPrivateScope Scope(CGF); + if (!PrivateVars.empty() || !FirstprivateVars.empty()) { + auto *CopyFn = CGF.Builder.CreateAlignedLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)), + CGF.PointerAlignInBytes); + auto *PrivatesPtr = CGF.Builder.CreateAlignedLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)), + CGF.PointerAlignInBytes); + // Map privates. + llvm::SmallVector, 16> + PrivatePtrs; + llvm::SmallVector CallArgs; + CallArgs.push_back(PrivatesPtr); + for (auto *E : PrivateVars) { + auto *VD = cast(cast(E)->getDecl()); + auto *PrivatePtr = + CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); + PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); + CallArgs.push_back(PrivatePtr); + } + for (auto *E : FirstprivateVars) { + auto *VD = cast(cast(E)->getDecl()); + auto *PrivatePtr = + CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); + PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); + CallArgs.push_back(PrivatePtr); + } + CGF.EmitRuntimeCall(CopyFn, CallArgs); + for (auto &&Pair : PrivatePtrs) { + auto *Replacement = + CGF.Builder.CreateAlignedLoad(Pair.second, CGF.PointerAlignInBytes); + Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); + } + } + (void)Scope.Privatize(); + if (*PartId) { + // TODO: emit code for untied tasks. + } + CGF.EmitStmt(CS->getCapturedStmt()); + }; + auto OutlinedFn = + CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, CodeGen); + // Check if we should emit tied or untied task. + bool Tied = !S.getSingleClause(OMPC_untied); + // Check if the task is final + llvm::PointerIntPair Final; + if (auto *Clause = S.getSingleClause(OMPC_final)) { + // If the condition constant folds and can be elided, try to avoid emitting + // the condition and the dead arm of the if/else. + auto *Cond = cast(Clause)->getCondition(); + bool CondConstant; + if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) + Final.setInt(CondConstant); + else + Final.setPointer(EvaluateExprAsBool(Cond)); + } else { + // By default the task is not final. + Final.setInt(/*IntVal=*/false); + } + auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); + const Expr *IfCond = nullptr; + if (auto C = S.getSingleClause(OMPC_if)) { + IfCond = cast(C)->getCondition(); + } CGM.getOpenMPRuntime().emitTaskCall( *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy, - CapturedStruct, IfCond, Privates, PrivateCopies, FirstprivateVars, + CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars, FirstprivateCopies, FirstprivateInits); } diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp index ee6bdb21e5..cfe8db3d65 100644 --- a/lib/Sema/SemaOpenMP.cpp +++ b/lib/Sema/SemaOpenMP.cpp @@ -1211,9 +1211,18 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { } case OMPD_task: { QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1); + QualType Args[] = {Context.VoidPtrTy.withConst().withRestrict()}; + FunctionProtoType::ExtProtoInfo EPI; + EPI.Variadic = true; + QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI); Sema::CapturedParamNameType Params[] = { std::make_pair(".global_tid.", KmpInt32Ty), std::make_pair(".part_id.", KmpInt32Ty), + std::make_pair(".privates.", + Context.VoidPtrTy.withConst().withRestrict()), + std::make_pair( + ".copy_fn.", + Context.getPointerType(CopyFnType).withConst().withRestrict()), std::make_pair(StringRef(), QualType()) // __context with shared vars }; ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, diff --git a/test/OpenMP/task_firstprivate_codegen.cpp b/test/OpenMP/task_firstprivate_codegen.cpp index 449be6815a..e1358d9ecc 100644 --- a/test/OpenMP/task_firstprivate_codegen.cpp +++ b/test/OpenMP/task_firstprivate_codegen.cpp @@ -63,8 +63,6 @@ int main() { // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_ADDR_REF]] // LAMBDA: [[G_VAL:%.+]] = load volatile double, double* [[G_REF]] // LAMBDA: store volatile double [[G_VAL]], double* [[G_PRIVATE_ADDR]] -// LAMBDA: [[G_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 0 -// LAMBDA: store double* [[G_PRIVATE_ADDR]], double** [[G_ADDR_REF]], // LAMBDA: call i32 @__kmpc_omp_task(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) // LAMBDA: ret #pragma omp task firstprivate(g) @@ -76,6 +74,7 @@ int main() { // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]] // LAMBDA: store volatile double 2.0{{.+}}, double* [[G_REF]] + // LAMBDA: store double* %{{.+}}, double** %{{.+}}, // LAMBDA: define internal i32 [[TASK_ENTRY]](i32, %{{.+}}*) g = 1; // LAMBDA: store volatile double 1.0{{.+}}, double* %{{.+}}, @@ -100,8 +99,6 @@ int main() { // BLOCKS: [[G_REF:%.+]] = load double*, double** [[G_ADDR_REF]] // BLOCKS: [[G_VAL:%.+]] = load volatile double, double* [[G_REF]] // BLOCKS: store volatile double [[G_VAL]], double* [[G_PRIVATE_ADDR]] - // BLOCKS: [[G_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 0 - // BLOCKS: store double* [[G_PRIVATE_ADDR]], double** [[G_ADDR_REF]], // BLOCKS: call i32 @__kmpc_omp_task(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) // BLOCKS: ret #pragma omp task firstprivate(g) @@ -112,6 +109,7 @@ int main() { // BLOCKS-NOT: [[G]]{{[[^:word:]]}} // BLOCKS: ret + // BLOCKS: store double* %{{.+}}, double** %{{.+}}, // BLOCKS: define internal i32 [[TASK_ENTRY]](i32, %{{.+}}*) g = 1; // BLOCKS: store volatile double 1.0{{.+}}, double* %{{.+}}, @@ -187,10 +185,6 @@ int main() { // CHECK: [[VAR_REF:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[VAR_ADDR_REF]], // CHECK: call void [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]], [[S_DOUBLE_TY]]* {{.*}}[[VAR_REF]], -// shareds->var_addr = &kmp_task_t->privates.var; -// CHECK: [[VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 3 -// CHECK: store [[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]], [[S_DOUBLE_TY]]** [[VAR_ADDR_REF]], - // s_arr; // CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 // CHECK: [[S_ARR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 2 @@ -201,10 +195,6 @@ int main() { // CHECK: icmp eq // CHECK: br i1 -// shareds->s_arr_addr = &kmp_task_t->privates.s_arr; -// CHECK: [[S_ARR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 2 -// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[PRIVATE_S_ARR_REF]], [2 x [[S_DOUBLE_TY]]]** [[S_ARR_ADDR_REF]], - // t_var; // CHECK: [[PRIVATE_T_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 // CHECK: [[T_VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 1 @@ -212,19 +202,11 @@ int main() { // CHECK: [[T_VAR:%.+]] = load i{{.+}}, i{{.+}}* [[T_VAR_REF]], // CHECK: store i32 [[T_VAR]], i32* [[PRIVATE_T_VAR_REF]], -// shareds->t_var_addr = &kmp_task_t->privates.t_var; -// CHECK: [[T_VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 1 -// CHECK: store i32* [[PRIVATE_T_VAR_REF]], i32** [[T_VAR_ADDR_REF]], - // vec; // CHECK: [[PRIVATE_VEC_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 // CHECK: [[VEC_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 0 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64( -// shareds->vec_addr = &kmp_task_t->privates.vec; -// CHECK: [[VEC_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 0 -// CHECK: store [2 x i32]* [[PRIVATE_VEC_REF]], [2 x i32]** [[VEC_ADDR_REF]], - // Provide pointer to destructor function, which will destroy private variables at the end of the task. // CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3 // CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_REF]], @@ -242,18 +224,42 @@ int main() { // CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 // CHECK: ret // + +// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_MAIN_TY]]* noalias, [[S_DOUBLE_TY]]** noalias, i32** noalias, [2 x [[S_DOUBLE_TY]]]** noalias, [2 x i32]** noalias) +// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_MAIN_TY]]*, [[PRIVATES_MAIN_TY]]** +// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 0 +// CHECK: [[ARG1:%.+]] = load [[S_DOUBLE_TY]]**, [[S_DOUBLE_TY]]*** {{.+}}, +// CHECK: store [[S_DOUBLE_TY]]* [[PRIV_VAR]], [[S_DOUBLE_TY]]** [[ARG1]], +// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 1 +// CHECK: [[ARG3:%.+]] = load [2 x [[S_DOUBLE_TY]]]**, [2 x [[S_DOUBLE_TY]]]*** %{{.+}}, +// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[PRIV_S_VAR]], [2 x [[S_DOUBLE_TY]]]** [[ARG3]], +// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 2 +// CHECK: [[ARG2:%.+]] = load i32**, i32*** %{{.+}}, +// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG2]], +// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 3 +// CHECK: [[ARG4:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}}, +// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG4]], +// CHECK: ret void + // CHECK: define internal i32 [[TASK_ENTRY]](i32, [[KMP_TASK_MAIN_TY]]*) -// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[SHAREDS_ADDR_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 0 -// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_ADDR_REF]], -// CHECK: [[SHAREDS:%.+]] = bitcast i8* [[SHAREDS_REF]] to [[CAP_MAIN_TY]]* +// CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]]*, +// CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, +// CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, +// CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, +// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], +// CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]]) +// CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], +// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], +// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_DOUBLE_TY]]]*, [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], +// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]], // Privates actually are used. -// CHECK-DAG: getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{.+}} 0, i{{.+}} 0 -// CHECK-DAG: getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{.+}} 0, i{{.+}} 1 -// CHECK-DAG: getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{.+}} 0, i{{.+}} 2 -// CHECK-DAG: getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{.+}} 0, i{{.+}} 3 +// CHECK-DAG: [[PRIV_VAR]] +// CHECK-DAG: [[PRIV_T_VAR]] +// CHECK-DAG: [[PRIV_S_ARR]] +// CHECK-DAG: [[PRIV_VEC]] // CHECK: ret @@ -317,19 +323,11 @@ int main() { // CHECK: [[T_VAR:%.+]] = load i{{.+}}, i{{.+}}* [[T_VAR_REF]], // CHECK: store i32 [[T_VAR]], i32* [[PRIVATE_T_VAR_REF]], -// shareds->t_var_addr = &kmp_task_t->privates.t_var; -// CHECK: [[T_VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 1 -// CHECK: store i32* [[PRIVATE_T_VAR_REF]], i32** [[T_VAR_ADDR_REF]], - // vec; // CHECK: [[PRIVATE_VEC_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 // CHECK: [[VEC_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 0 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64( -// shareds->vec_addr = &kmp_task_t->privates.vec; -// CHECK: [[VEC_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 0 -// CHECK: store [2 x i32]* [[PRIVATE_VEC_REF]], [2 x i32]** [[VEC_ADDR_REF]], - // Constructors for s_arr and var. // a_arr; // CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 @@ -341,19 +339,11 @@ int main() { // CHECK: icmp eq // CHECK: br i1 -// shareds->s_arr_addr = &kmp_task_t->privates.s_arr; -// CHECK: [[S_ARR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 2 -// CHECK: store [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR_REF]], - // var; // CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 // CHECK: [[VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 3 // CHECK: call void [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[PRIVATE_VAR_REF]], -// shareds->var_addr = &kmp_task_t->privates.var; -// CHECK: [[VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 3 -// CHECK: store [[S_INT_TY]]* [[PRIVATE_VAR_REF]], [[S_INT_TY]]** [[VAR_ADDR_REF]], - // Provide pointer to destructor function, which will destroy private variables at the end of the task. // CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3 // CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_REF]], @@ -369,19 +359,42 @@ int main() { // CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 // CHECK: ret // + +// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_TMAIN_TY]]* noalias, i32** noalias, [2 x i32]** noalias, [2 x [[S_INT_TY]]]** noalias, [[S_INT_TY]]** noalias) +// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_TMAIN_TY]]*, [[PRIVATES_TMAIN_TY]]** +// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 0 +// CHECK: [[ARG1:%.+]] = load i32**, i32*** %{{.+}}, +// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG1]], +// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 1 +// CHECK: [[ARG2:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}}, +// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG2]], +// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 2 +// CHECK: [[ARG3:%.+]] = load [2 x [[S_INT_TY]]]**, [2 x [[S_INT_TY]]]*** %{{.+}}, +// CHECK: store [2 x [[S_INT_TY]]]* [[PRIV_S_VAR]], [2 x [[S_INT_TY]]]** [[ARG3]], +// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 3 +// CHECK: [[ARG4:%.+]] = load [[S_INT_TY]]**, [[S_INT_TY]]*** {{.+}}, +// CHECK: store [[S_INT_TY]]* [[PRIV_VAR]], [[S_INT_TY]]** [[ARG4]], +// CHECK: ret void + // CHECK: define internal i32 [[TASK_ENTRY]](i32, [[KMP_TASK_TMAIN_TY]]*) -// Substitute addresses of shared variables in capture struct by address of private copies from kmp_task_t. -// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[SHAREDS_ADDR_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 0 -// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_ADDR_REF]], -// CHECK: [[SHAREDS:%.+]] = bitcast i8* [[SHAREDS_REF]] to [[CAP_TMAIN_TY]]* +// CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, +// CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, +// CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, +// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], +// CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) +// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], +// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]], +// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], +// CHECK: [[PRIV_VAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[PRIV_VAR_ADDR]], // Privates actually are used. -// CHECK-DAG: getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{.+}} 0, i{{.+}} 0 -// CHECK-DAG: getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{.+}} 0, i{{.+}} 1 -// CHECK-DAG: getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{.+}} 0, i{{.+}} 2 -// CHECK-DAG: getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{.+}} 0, i{{.+}} 3 +// CHECK-DAG: [[PRIV_VAR]] +// CHECK-DAG: [[PRIV_T_VAR]] +// CHECK-DAG: [[PRIV_S_ARR]] +// CHECK-DAG: [[PRIV_VEC]] // CHECK: ret @@ -410,9 +423,9 @@ struct St { void array_func(int n, float a[n], St s[2]) { // ARRAY: call i8* @__kmpc_omp_task_alloc( +// ARRAY: call i32 @__kmpc_omp_task( // ARRAY: store float** %{{.+}}, float*** %{{.+}}, // ARRAY: store %struct.St** %{{.+}}, %struct.St*** %{{.+}}, -// ARRAY: call i32 @__kmpc_omp_task( #pragma omp task firstprivate(a, s) ; } diff --git a/test/OpenMP/task_private_codegen.cpp b/test/OpenMP/task_private_codegen.cpp index e6f11d3e85..3a9ed7c58d 100644 --- a/test/OpenMP/task_private_codegen.cpp +++ b/test/OpenMP/task_private_codegen.cpp @@ -58,8 +58,6 @@ int main() { // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 40, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) // LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 -// LAMBDA: [[G_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 0 -// LAMBDA: store double* [[G_PRIVATE_ADDR]], double** [[G_ADDR_REF]], // LAMBDA: call i32 @__kmpc_omp_task(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) // LAMBDA: ret #pragma omp task private(g) @@ -91,8 +89,6 @@ int main() { // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 40, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) // BLOCKS: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 - // BLOCKS: [[G_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 0 - // BLOCKS: store double* [[G_PRIVATE_ADDR]], double** [[G_ADDR_REF]], // BLOCKS: call i32 @__kmpc_omp_task(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) // BLOCKS: ret #pragma omp task private(g) @@ -167,17 +163,12 @@ int main() { // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). // Also copy address of private copy to the corresponding shareds reference. // CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: [[SHAREDS:%.+]] = bitcast i8* [[SHAREDS_REF]] to [[CAP_MAIN_TY]]* // Constructors for s_arr and var. // var; // CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 // CHECK: call void [[S_DOUBLE_TY_DEF_CONSTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF:%.+]]) -// shareds->var_addr = &kmp_task_t->privates.var; -// CHECK: [[VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 3 -// CHECK: store [[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]], [[S_DOUBLE_TY]]** [[VAR_ADDR_REF]], - // a_arr; // CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 // CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 @@ -187,20 +178,6 @@ int main() { // CHECK: icmp eq // CHECK: br i1 -// shareds->s_arr_addr = &kmp_task_t->privates.s_arr; -// CHECK: [[S_ARR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 2 -// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[PRIVATE_S_ARR_REF]], [2 x [[S_DOUBLE_TY]]]** [[S_ARR_ADDR_REF]], - -// shareds->t_var_addr = &kmp_task_t->privates.t_var; -// CHECK: [[PRIVATE_T_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 -// CHECK: [[T_VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 1 -// CHECK: store i32* [[PRIVATE_T_VAR_REF]], i32** [[T_VAR_ADDR_REF]], - -// shareds->vec_addr = &kmp_task_t->privates.vec; -// CHECK: [[PRIVATE_VEC_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 -// CHECK: [[VEC_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 0 -// CHECK: store [2 x i32]* [[PRIVATE_VEC_REF]], [2 x i32]** [[VEC_ADDR_REF]], - // Provide pointer to destructor function, which will destroy private variables at the end of the task. // CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3 // CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_REF]], @@ -218,18 +195,42 @@ int main() { // CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 // CHECK: ret // + +// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_MAIN_TY]]* noalias, [[S_DOUBLE_TY]]** noalias, i32** noalias, [2 x [[S_DOUBLE_TY]]]** noalias, [2 x i32]** noalias) +// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_MAIN_TY]]*, [[PRIVATES_MAIN_TY]]** +// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 0 +// CHECK: [[ARG1:%.+]] = load [[S_DOUBLE_TY]]**, [[S_DOUBLE_TY]]*** {{.+}}, +// CHECK: store [[S_DOUBLE_TY]]* [[PRIV_VAR]], [[S_DOUBLE_TY]]** [[ARG1]], +// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 1 +// CHECK: [[ARG3:%.+]] = load [2 x [[S_DOUBLE_TY]]]**, [2 x [[S_DOUBLE_TY]]]*** %{{.+}}, +// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[PRIV_S_VAR]], [2 x [[S_DOUBLE_TY]]]** [[ARG3]], +// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 2 +// CHECK: [[ARG2:%.+]] = load i32**, i32*** %{{.+}}, +// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG2]], +// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 3 +// CHECK: [[ARG4:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}}, +// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG4]], +// CHECK: ret void + // CHECK: define internal i32 [[TASK_ENTRY]](i32, [[KMP_TASK_MAIN_TY]]*) -// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[SHAREDS_ADDR_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 0 -// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_ADDR_REF]], -// CHECK: [[SHAREDS:%.+]] = bitcast i8* [[SHAREDS_REF]] to [[CAP_MAIN_TY]]* +// CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]]*, +// CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, +// CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, +// CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, +// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], +// CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]]) +// CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], +// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], +// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_DOUBLE_TY]]]*, [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], +// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]], // Privates actually are used. -// CHECK-DAG: getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{.+}} 0, i{{.+}} 0 -// CHECK-DAG: getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{.+}} 0, i{{.+}} 1 -// CHECK-DAG: getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{.+}} 0, i{{.+}} 2 -// CHECK-DAG: getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{.+}} 0, i{{.+}} 3 +// CHECK-DAG: [[PRIV_VAR]] +// CHECK-DAG: [[PRIV_T_VAR]] +// CHECK-DAG: [[PRIV_S_ARR]] +// CHECK-DAG: [[PRIV_VEC]] // CHECK: ret @@ -283,17 +284,6 @@ int main() { // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). // CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: [[SHAREDS:%.+]] = bitcast i8* [[SHAREDS_REF]] to [[CAP_TMAIN_TY]]* - -// shareds->t_var_addr = &kmp_task_t->privates.t_var; -// CHECK: [[PRIVATE_T_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 -// CHECK: [[T_VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 1 -// CHECK: store i32* [[PRIVATE_T_VAR_REF]], i32** [[T_VAR_ADDR_REF]], - -// shareds->vec_addr = &kmp_task_t->privates.vec; -// CHECK: [[PRIVATE_VEC_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 -// CHECK: [[VEC_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 0 -// CHECK: store [2 x i32]* [[PRIVATE_VEC_REF]], [2 x i32]** [[VEC_ADDR_REF]], // Constructors for s_arr and var. // a_arr; @@ -305,18 +295,10 @@ int main() { // CHECK: icmp eq // CHECK: br i1 -// shareds->s_arr_addr = &kmp_task_t->privates.s_arr; -// CHECK: [[S_ARR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 2 -// CHECK: store [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR_REF]], - // var; // CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 // CHECK: call void [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[PRIVATE_VAR_REF:%.+]]) -// shareds->var_addr = &kmp_task_t->privates.var; -// CHECK: [[VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 3 -// CHECK: store [[S_INT_TY]]* [[PRIVATE_VAR_REF]], [[S_INT_TY]]** [[VAR_ADDR_REF]], - // Provide pointer to destructor function, which will destroy private variables at the end of the task. // CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3 // CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_REF]], @@ -332,19 +314,42 @@ int main() { // CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 // CHECK: ret // + +// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_TMAIN_TY]]* noalias, i32** noalias, [2 x i32]** noalias, [2 x [[S_INT_TY]]]** noalias, [[S_INT_TY]]** noalias) +// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_TMAIN_TY]]*, [[PRIVATES_TMAIN_TY]]** +// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 0 +// CHECK: [[ARG1:%.+]] = load i32**, i32*** %{{.+}}, +// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG1]], +// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 1 +// CHECK: [[ARG2:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}}, +// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG2]], +// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 2 +// CHECK: [[ARG3:%.+]] = load [2 x [[S_INT_TY]]]**, [2 x [[S_INT_TY]]]*** %{{.+}}, +// CHECK: store [2 x [[S_INT_TY]]]* [[PRIV_S_VAR]], [2 x [[S_INT_TY]]]** [[ARG3]], +// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 3 +// CHECK: [[ARG4:%.+]] = load [[S_INT_TY]]**, [[S_INT_TY]]*** {{.+}}, +// CHECK: store [[S_INT_TY]]* [[PRIV_VAR]], [[S_INT_TY]]** [[ARG4]], +// CHECK: ret void + // CHECK: define internal i32 [[TASK_ENTRY]](i32, [[KMP_TASK_TMAIN_TY]]*) -// Substitute addresses of shared variables in capture struct by address of private copies from kmp_task_t. -// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[SHAREDS_ADDR_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 0 -// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_ADDR_REF]], -// CHECK: [[SHAREDS:%.+]] = bitcast i8* [[SHAREDS_REF]] to [[CAP_TMAIN_TY]]* +// CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, +// CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, +// CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, +// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], +// CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) +// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], +// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]], +// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], +// CHECK: [[PRIV_VAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[PRIV_VAR_ADDR]], // Privates actually are used. -// CHECK-DAG: getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{.+}} 0, i{{.+}} 0 -// CHECK-DAG: getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{.+}} 0, i{{.+}} 1 -// CHECK-DAG: getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{.+}} 0, i{{.+}} 2 -// CHECK-DAG: getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{.+}} 0, i{{.+}} 3 +// CHECK-DAG: [[PRIV_VAR]] +// CHECK-DAG: [[PRIV_T_VAR]] +// CHECK-DAG: [[PRIV_S_ARR]] +// CHECK-DAG: [[PRIV_VEC]] // CHECK: ret @@ -373,9 +378,9 @@ struct St { void array_func(int n, float a[n], St s[2]) { // ARRAY: call i8* @__kmpc_omp_task_alloc( +// ARRAY: call i32 @__kmpc_omp_task( // ARRAY: store float** %{{.+}}, float*** %{{.+}}, // ARRAY: store %struct.St** %{{.+}}, %struct.St*** %{{.+}}, -// ARRAY: call i32 @__kmpc_omp_task( #pragma omp task private(a, s) ; } -- 2.50.1