From 8ee6f73c6a9cbb31eb06cb7b3fec5be953c92042 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 25 Apr 2016 12:22:29 +0000 Subject: [PATCH] [OPENMP 4.5] Codegen for 'taskloop' directive. The taskloop construct specifies that the iterations of one or more associated loops will be executed in parallel using OpenMP tasks. The iterations are distributed across tasks created by the construct and scheduled to be executed. The next code will be generated for the taskloop directive: #pragma omp taskloop num_tasks(N) lastprivate(j) for( i=0; ishareds; psh->pth_counter = &th_counter; psh->pcounter = &counter; psh->pj = &j; task->lb = 0; task->ub = N*GRAIN*STRIDE-2; task->st = STRIDE; __kmpc_taskloop( NULL, // location gtid, // gtid task, // task structure 1, // if clause value &task->lb, // lower bound &task->ub, // upper bound STRIDE, // loop increment 0, // 1 if nogroup specified 2, // schedule type: 0-none, 1-grainsize, 2-num_tasks N, // schedule value (ignored for type 0) (void*)&__task_dup_entry // tasks duplication routine ); git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@267395 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGOpenMPRuntime.cpp | 224 +++++++++++++++++++++++++++---- lib/CodeGen/CGOpenMPRuntime.h | 116 ++++++++++++++++ lib/CodeGen/CGStmtOpenMP.cpp | 222 +++++++++++++++++++++++------- lib/CodeGen/CodeGenFunction.h | 20 +++ lib/Sema/SemaOpenMP.cpp | 45 ++++++- test/OpenMP/taskloop_codegen.cpp | 198 +++++++++++++++++++++++++++ 6 files changed, 748 insertions(+), 77 deletions(-) create mode 100644 test/OpenMP/taskloop_codegen.cpp diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index f732131bf5..bfb7f99740 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -608,9 +608,13 @@ enum OpenMPRTLFunction { // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, // kmp_int32 num_teams, kmp_int32 thread_limit); OMPRTL__kmpc_push_num_teams, - /// \brief Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, - /// kmpc_micro microtask, ...); + // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro + // microtask, ...); OMPRTL__kmpc_fork_teams, + // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int + // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int + // sched, kmp_uint64 grainsize, void *task_dup); + OMPRTL__kmpc_taskloop, // // Offloading related calls @@ -842,10 +846,11 @@ llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( assert(!ThreadIDVar->getType()->isPointerType() && "thread id variable must be of type kmp_int32 for tasks"); auto *CS = cast(D.getAssociatedStmt()); + auto *TD = dyn_cast(&D); CodeGenFunction CGF(CGM, true); - CGOpenMPTaskOutlinedRegionInfo CGInfo( - *CS, ThreadIDVar, CodeGen, InnermostKind, - cast(D).hasCancel(), Action); + CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, + InnermostKind, + TD ? TD->hasCancel() : false, Action); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); auto *Res = CGF.GenerateCapturedStmtFunction(*CS); if (!Tied) @@ -1433,6 +1438,26 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); break; } + case OMPRTL__kmpc_taskloop: { + // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int + // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int + // sched, kmp_uint64 grainsize, void *task_dup); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), + CGM.IntTy, + CGM.VoidPtrTy, + CGM.IntTy, + CGM.Int64Ty->getPointerTo(), + CGM.Int64Ty->getPointerTo(), + CGM.Int64Ty, + CGM.IntTy, + CGM.IntTy, + CGM.Int64Ty, + CGM.VoidPtrTy}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); + break; + } case OMPRTL__tgt_target: { // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t @@ -2492,6 +2517,14 @@ enum KmpTaskTFields { KmpTaskTPartId, /// \brief Function with call of destructors for private variables. KmpTaskTDestructors, + /// (Taskloops only) Lower bound. + KmpTaskTLowerBound, + /// (Taskloops only) Upper bound. + KmpTaskTUpperBound, + /// (Taskloops only) Stride. + KmpTaskTStride, + /// (Taskloops only) Is last iteration flag. + KmpTaskTLastIter, }; } // anonymous namespace @@ -2999,7 +3032,8 @@ createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef Privates) { } static RecordDecl * -createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty, +createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, + QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy) { auto &C = CGM.getContext(); // Build struct kmp_task_t { @@ -3007,6 +3041,11 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty, // kmp_routine_entry_t routine; // kmp_int32 part_id; // kmp_routine_entry_t destructors; + // For taskloops additional fields: + // kmp_uint64 lb; + // kmp_uint64 ub; + // kmp_int64 st; + // kmp_int32 liter; // }; auto *RD = C.buildImplicitRecord("kmp_task_t"); RD->startDefinition(); @@ -3014,6 +3053,16 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty, addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); addFieldToRecordDecl(C, RD, KmpInt32Ty); addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); + if (isOpenMPTaskLoopDirective(Kind)) { + QualType KmpUInt64Ty = + CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); + QualType KmpInt64Ty = + CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); + addFieldToRecordDecl(C, RD, KmpUInt64Ty); + addFieldToRecordDecl(C, RD, KmpUInt64Ty); + addFieldToRecordDecl(C, RD, KmpInt64Ty); + addFieldToRecordDecl(C, RD, KmpInt32Ty); + } RD->completeDefinition(); return RD; } @@ -3041,13 +3090,16 @@ createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, /// \code /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, +/// For taskloops: +/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, /// tt->shareds); /// return 0; /// } /// \endcode static llvm::Value * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, - QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, + OpenMPDirectiveKind Kind, QualType KmpInt32Ty, + QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Value *TaskFunction, llvm::Value *TaskPrivatesMap) { @@ -3071,7 +3123,10 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, - // tt, tt->task_data.shareds); + // tt, + // For taskloops: + // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, + // tt->task_data.shareds); auto *GtidParam = CGF.EmitLoadOfScalar( CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); LValue TDBase = CGF.EmitLoadOfPointerLValue( @@ -3098,16 +3153,37 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( PrivatesLVal.getPointer(), CGF.VoidPtrTy); - } else { + } else PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); - } - llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam, - TaskPrivatesMap, - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - TDBase.getAddress(), CGF.VoidPtrTy) - .getPointer(), - SharedsParam}; + llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, + TaskPrivatesMap, + CGF.Builder + .CreatePointerBitCastOrAddrSpaceCast( + TDBase.getAddress(), CGF.VoidPtrTy) + .getPointer()}; + SmallVector CallArgs(std::begin(CommonArgs), + std::end(CommonArgs)); + if (isOpenMPTaskLoopDirective(Kind)) { + auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); + auto LBLVal = CGF.EmitLValueForField(Base, *LBFI); + auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal(); + auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); + auto UBLVal = CGF.EmitLValueForField(Base, *UBFI); + auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal(); + auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); + auto StLVal = CGF.EmitLValueForField(Base, *StFI); + auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal(); + auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); + auto LILVal = CGF.EmitLValueForField(Base, *LIFI); + auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal(); + CallArgs.push_back(LBParam); + CallArgs.push_back(UBParam); + CallArgs.push_back(StParam); + CallArgs.push_back(LIParam); + } + CallArgs.push_back(SharedsParam); + CGF.EmitCallOrInvoke(TaskFunction, CallArgs); CGF.EmitStoreThroughLValue( RValue::get(CGF.Builder.getInt32(/*C=*/0)), @@ -3244,20 +3320,17 @@ static int array_pod_sort_comparator(const PrivateDataTy *P1, return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); } -void CGOpenMPRuntime::emitTaskCall( +CGOpenMPRuntime::TaskDataTy CGOpenMPRuntime::emitTaskInit( CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, bool Tied, llvm::PointerIntPair Final, unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy, - Address Shareds, const Expr *IfCond, ArrayRef PrivateVars, + Address Shareds, ArrayRef PrivateVars, ArrayRef PrivateCopies, ArrayRef FirstprivateVars, ArrayRef FirstprivateCopies, - ArrayRef FirstprivateInits, - ArrayRef> Dependences) { - if (!CGF.HaveInsertPoint()) - return; + ArrayRef FirstprivateInits) { auto &C = CGM.getContext(); - llvm::SmallVector Privates; + llvm::SmallVector Privates; // Aggregate privates and sort them by the alignment. auto I = PrivateCopies.begin(); for (auto *E : PrivateVars) { @@ -3287,8 +3360,8 @@ void CGOpenMPRuntime::emitTaskCall( emitKmpRoutineEntryT(KmpInt32Ty); // Build type kmp_task_t (if not built yet). if (KmpTaskTQTy.isNull()) { - KmpTaskTQTy = C.getRecordType( - createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy)); + KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( + CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); } auto *KmpTaskTQTyRD = cast(KmpTaskTQTy->getAsTagDecl()); // Build particular struct kmp_task_t for the given task. @@ -3321,8 +3394,9 @@ void CGOpenMPRuntime::emitTaskCall( // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, // kmp_task_t *tt); auto *TaskEntry = emitProxyTaskFunction( - CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy, - KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap); + CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, + KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, + TaskPrivatesMap); // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, @@ -3454,7 +3528,38 @@ void CGOpenMPRuntime::emitTaskCall( CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( DestructorFn, KmpRoutineEntryPtrTy), Destructor); + TaskDataTy Data; + Data.NewTask = NewTask; + Data.TaskEntry = TaskEntry; + Data.NewTaskNewTaskTTy = NewTaskNewTaskTTy; + Data.TDBase = TDBase; + Data.KmpTaskTQTyRD = KmpTaskTQTyRD; + return Data; +} +void CGOpenMPRuntime::emitTaskCall( + CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, + bool Tied, llvm::PointerIntPair Final, + unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy, + Address Shareds, const Expr *IfCond, ArrayRef PrivateVars, + ArrayRef PrivateCopies, + ArrayRef FirstprivateVars, + ArrayRef FirstprivateCopies, + ArrayRef FirstprivateInits, + ArrayRef> Dependences) { + if (!CGF.HaveInsertPoint()) + return; + + TaskDataTy Data = + emitTaskInit(CGF, Loc, D, Tied, Final, NumberOfParts, TaskFunction, + SharedsTy, Shareds, PrivateVars, PrivateCopies, + FirstprivateVars, FirstprivateCopies, FirstprivateInits); + llvm::Value *NewTask = Data.NewTask; + llvm::Value *TaskEntry = Data.TaskEntry; + llvm::Value *NewTaskNewTaskTTy = Data.NewTaskNewTaskTTy; + LValue TDBase = Data.TDBase; + RecordDecl *KmpTaskTQTyRD = Data.KmpTaskTQTyRD; + auto &C = CGM.getContext(); // Process list of dependences. Address DependenciesArray = Address::invalid(); unsigned NumDependencies = Dependences.size(); @@ -3629,6 +3734,71 @@ void CGOpenMPRuntime::emitTaskCall( } } +void CGOpenMPRuntime::emitTaskLoopCall( + CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, + bool Tied, llvm::PointerIntPair Final, bool Nogroup, + unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy, + Address Shareds, const Expr *IfCond, ArrayRef PrivateVars, + ArrayRef PrivateCopies, + ArrayRef FirstprivateVars, + ArrayRef FirstprivateCopies, + ArrayRef FirstprivateInits) { + if (!CGF.HaveInsertPoint()) + return; + TaskDataTy Data = + emitTaskInit(CGF, Loc, D, Tied, Final, NumberOfParts, TaskFunction, + SharedsTy, Shareds, PrivateVars, PrivateCopies, + FirstprivateVars, FirstprivateCopies, FirstprivateInits); + // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() + // libcall. + // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int + // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int + // sched, kmp_uint64 grainsize, void *task_dup); + llvm::Value *ThreadID = getThreadID(CGF, Loc); + llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *IfVal; + if (IfCond) { + IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, + /*isSigned=*/true); + } else + IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); + + LValue LBLVal = CGF.EmitLValueForField( + Data.TDBase, + *std::next(Data.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); + auto *LBVar = + cast(cast(D.getLowerBoundVariable())->getDecl()); + CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), + /*IsInitializer=*/true); + LValue UBLVal = CGF.EmitLValueForField( + Data.TDBase, + *std::next(Data.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); + auto *UBVar = + cast(cast(D.getUpperBoundVariable())->getDecl()); + CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), + /*IsInitializer=*/true); + LValue StLVal = CGF.EmitLValueForField( + Data.TDBase, + *std::next(Data.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); + auto *StVar = + cast(cast(D.getStrideVariable())->getDecl()); + CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), + /*IsInitializer=*/true); + llvm::Value *TaskArgs[] = { + UpLoc, + ThreadID, + Data.NewTask, + IfVal, + LBLVal.getPointer(), + UBLVal.getPointer(), + CGF.EmitLoadOfScalar(StLVal, SourceLocation()), + llvm::ConstantInt::getSigned(CGF.IntTy, Nogroup ? 1 : 0), + llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/0), + llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), + llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); +} + /// \brief Emit reduction operation for each element of array (required for /// array sections) LHS op = RHS. /// \param Type Type of array. diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index a7efd45fd4..028e46dec7 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -14,6 +14,7 @@ #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H +#include "CGValue.h" #include "clang/AST/Type.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/SourceLocation.h" @@ -37,6 +38,7 @@ namespace clang { class Expr; class GlobalDecl; class OMPExecutableDirective; +class OMPLoopDirective; class VarDecl; class OMPDeclareReductionDecl; class IdentifierInfo; @@ -431,6 +433,64 @@ private: /// llvm::Value *getCriticalRegionLock(StringRef CriticalName); + struct TaskDataTy { + llvm::Value *NewTask; + llvm::Value *TaskEntry; + llvm::Value *NewTaskNewTaskTTy; + LValue TDBase; + RecordDecl *KmpTaskTQTyRD; + }; + /// Emit task region for the task directive. The task region is emitted in + /// several steps: + /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 + /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, + /// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the + /// function: + /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { + /// TaskFunction(gtid, tt->part_id, tt->shareds); + /// return 0; + /// } + /// 2. Copy a list of shared variables to field shareds of the resulting + /// structure kmp_task_t returned by the previous call (if any). + /// 3. Copy a pointer to destructions function to field destructions of the + /// resulting structure kmp_task_t. + /// \param D Current task directive. + /// \param Tied true if the task is tied (the task is tied to the thread that + /// can suspend its task region), false - untied (the task is not tied to any + /// thread). + /// \param Final Contains either constant bool value, or llvm::Value * of i1 + /// type for final clause. If the value is true, the task forces all of its + /// child tasks to become final and included tasks. + /// \param NumberOfParts Number of parts in untied tasks. + /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32 + /// /*part_id*/, captured_struct */*__context*/); + /// \param SharedsTy A type which contains references the shared variables. + /// \param Shareds Context with the list of shared variables from the \p + /// TaskFunction. + /// \param IfCond Not a nullptr if 'if' clause was specified, nullptr + /// otherwise. + /// \param PrivateVars List of references to private variables for the task + /// directive. + /// \param PrivateCopies List of private copies for each private variable in + /// \p PrivateVars. + /// \param FirstprivateVars List of references to private variables for the + /// task directive. + /// \param FirstprivateCopies List of private copies for each private variable + /// in \p FirstprivateVars. + /// \param FirstprivateInits List of references to auto generated variables + /// used for initialization of a single array element. Used if firstprivate + /// variable is of array type. + TaskDataTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, + const OMPExecutableDirective &D, bool Tied, + llvm::PointerIntPair Final, + unsigned NumberOfParts, llvm::Value *TaskFunction, + QualType SharedsTy, Address Shareds, + ArrayRef PrivateVars, + ArrayRef PrivateCopies, + ArrayRef FirstprivateVars, + ArrayRef FirstprivateCopies, + ArrayRef FirstprivateInits); + public: explicit CGOpenMPRuntime(CodeGenModule &CGM); virtual ~CGOpenMPRuntime() {} @@ -774,6 +834,62 @@ public: ArrayRef FirstprivateInits, ArrayRef> Dependences); + /// Emit task region for the taskloop directive. The taskloop region is + /// emitted in several steps: + /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 + /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, + /// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the + /// function: + /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { + /// TaskFunction(gtid, tt->part_id, tt->shareds); + /// return 0; + /// } + /// 2. Copy a list of shared variables to field shareds of the resulting + /// structure kmp_task_t returned by the previous call (if any). + /// 3. Copy a pointer to destructions function to field destructions of the + /// resulting structure kmp_task_t. + /// 4. Emit a call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t + /// *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int + /// nogroup, int sched, kmp_uint64 grainsize, void *task_dup ), where new_task + /// is a resulting structure from + /// previous items. + /// \param D Current task directive. + /// \param Tied true if the task is tied (the task is tied to the thread that + /// can suspend its task region), false - untied (the task is not tied to any + /// thread). + /// \param Final Contains either constant bool value, or llvm::Value * of i1 + /// type for final clause. If the value is true, the task forces all of its + /// child tasks to become final and included tasks. + /// \param Nogroup true if nogroup clause was specified, false otherwise. + /// \param NumberOfParts Number of parts in untied taskloops. + /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32 + /// /*part_id*/, captured_struct */*__context*/); + /// \param SharedsTy A type which contains references the shared variables. + /// \param Shareds Context with the list of shared variables from the \p + /// TaskFunction. + /// \param IfCond Not a nullptr if 'if' clause was specified, nullptr + /// otherwise. + /// \param PrivateVars List of references to private variables for the task + /// directive. + /// \param PrivateCopies List of private copies for each private variable in + /// \p PrivateVars. + /// \param FirstprivateVars List of references to private variables for the + /// task directive. + /// \param FirstprivateCopies List of private copies for each private variable + /// in \p FirstprivateVars. + /// \param FirstprivateInits List of references to auto generated variables + /// used for initialization of a single array element. Used if firstprivate + /// variable is of array type. + virtual void emitTaskLoopCall( + CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, + bool Tied, llvm::PointerIntPair Final, + bool Nogroup, unsigned NumberOfParts, llvm::Value *TaskFunction, + QualType SharedsTy, Address Shareds, const Expr *IfCond, + ArrayRef PrivateVars, ArrayRef PrivateCopies, + ArrayRef FirstprivateVars, + ArrayRef FirstprivateCopies, + ArrayRef FirstprivateInits); + /// \brief Emit code for the directive that does not require outlining. /// /// \param InnermostKind Kind of innermost directive (for simple directives it diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index c4f8311f2e..4d9ecf0068 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -1630,8 +1630,8 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, // IV < UB BoolCondVal = EvaluateExprAsBool(S.getCond()); } else { - BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, - IL, LB, UB, ST); + BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL, + LB, UB, ST); } // If there are any cleanups between here and the loop-exit scope, @@ -2280,10 +2280,12 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective( emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); } -void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { +void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, + const RegionCodeGenTy &BodyGen, + const TaskGenTy &TaskGen, + bool Tied) { // Emit outlined function for task construct. auto CS = cast(S.getAssociatedStmt()); - auto CapturedStruct = GenerateCapturedStmtArgument(*CS); auto *I = CS->getCapturedDecl()->param_begin(); auto *PartId = std::next(I); auto *TaskT = std::next(I, 4); @@ -2291,52 +2293,44 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { // part id (0 for tied tasks, >=0 for untied task). llvm::DenseSet EmittedAsPrivate; // Get list of private variables. - llvm::SmallVector PrivateVars; - llvm::SmallVector PrivateCopies; + OMPPrivateDataTy Data; + Data.Tied = Tied; for (const auto *C : S.getClausesOfKind()) { auto IRef = C->varlist_begin(); for (auto *IInit : C->private_copies()) { auto *OrigVD = cast(cast(*IRef)->getDecl()); if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { - PrivateVars.push_back(*IRef); - PrivateCopies.push_back(IInit); + Data.PrivateVars.push_back(*IRef); + Data.PrivateCopies.push_back(IInit); } ++IRef; } } EmittedAsPrivate.clear(); // Get list of firstprivate variables. - llvm::SmallVector FirstprivateVars; - llvm::SmallVector FirstprivateCopies; - llvm::SmallVector FirstprivateInits; for (const auto *C : S.getClausesOfKind()) { auto IRef = C->varlist_begin(); auto IElemInitRef = C->inits().begin(); for (auto *IInit : C->private_copies()) { auto *OrigVD = cast(cast(*IRef)->getDecl()); if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { - FirstprivateVars.push_back(*IRef); - FirstprivateCopies.push_back(IInit); - FirstprivateInits.push_back(*IElemInitRef); + Data.FirstprivateVars.push_back(*IRef); + Data.FirstprivateCopies.push_back(IInit); + Data.FirstprivateInits.push_back(*IElemInitRef); } ++IRef; ++IElemInitRef; } } // Build list of dependences. - llvm::SmallVector, 8> - Dependences; - for (const auto *C : S.getClausesOfKind()) { - for (auto *IRef : C->varlists()) { - Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); - } - } - auto &&CodeGen = [&S, &PrivateVars, &FirstprivateVars]( - CodeGenFunction &CGF, PrePostActionTy &Action) { - OMPPrivateScope Scope(CGF); + for (const auto *C : S.getClausesOfKind()) + for (auto *IRef : C->varlists()) + Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); + auto &&CodeGen = [PartId, &S, &Data, CS, &BodyGen](CodeGenFunction &CGF, + PrePostActionTy &Action) { // Set proper addresses for generated private copies. - auto *CS = cast(S.getAssociatedStmt()); - if (!PrivateVars.empty() || !FirstprivateVars.empty()) { + OMPPrivateScope Scope(CGF); + if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty()) { auto *CopyFn = CGF.Builder.CreateLoad( CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); auto *PrivatesPtr = CGF.Builder.CreateLoad( @@ -2345,14 +2339,14 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { llvm::SmallVector, 16> PrivatePtrs; llvm::SmallVector CallArgs; CallArgs.push_back(PrivatesPtr); - for (auto *E : PrivateVars) { + for (auto *E : Data.PrivateVars) { auto *VD = cast(cast(E)->getDecl()); Address PrivatePtr = CGF.CreateMemTemp( CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); CallArgs.push_back(PrivatePtr.getPointer()); } - for (auto *E : FirstprivateVars) { + for (auto *E : Data.FirstprivateVars) { auto *VD = cast(cast(E)->getDecl()); Address PrivatePtr = CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), @@ -2370,13 +2364,21 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { (void)Scope.Privatize(); Action.Enter(CGF); - CGF.EmitStmt(CS->getCapturedStmt()); + BodyGen(CGF); }; + auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( + S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, + Data.NumberOfParts); + OMPLexicalScope Scope(*this, S); + TaskGen(*this, OutlinedFn, Data); +} + +void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { + // Emit outlined function for task construct. + auto CS = cast(S.getAssociatedStmt()); + auto CapturedStruct = GenerateCapturedStmtArgument(*CS); // Check if we should emit tied or untied task. bool Tied = !S.getSingleClause(); - unsigned NumberOfParts; - auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( - S, *I, *PartId, *TaskT, OMPD_task, CodeGen, Tied, NumberOfParts); // Check if the task is final llvm::PointerIntPair Final; if (const auto *Clause = S.getSingleClause()) { @@ -2401,11 +2403,20 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { break; } } - OMPLexicalScope Scope(*this, S); - CGM.getOpenMPRuntime().emitTaskCall( - *this, S.getLocStart(), S, Tied, Final, NumberOfParts, OutlinedFn, - SharedsTy, CapturedStruct, IfCond, PrivateVars, PrivateCopies, - FirstprivateVars, FirstprivateCopies, FirstprivateInits, Dependences); + + auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitStmt(CS->getCapturedStmt()); + }; + auto &&TaskGen = [&S, &Final, SharedsTy, CapturedStruct, + IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, + const OMPPrivateDataTy &Data) { + CGF.CGM.getOpenMPRuntime().emitTaskCall( + CGF, S.getLocStart(), S, Data.Tied, Final, Data.NumberOfParts, + OutlinedFn, SharedsTy, CapturedStruct, IfCond, Data.PrivateVars, + Data.PrivateCopies, Data.FirstprivateVars, Data.FirstprivateCopies, + Data.FirstprivateInits, Data.Dependences); + }; + EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Tied); } void CodeGenFunction::EmitOMPTaskyieldDirective( @@ -3230,15 +3241,136 @@ void CodeGenFunction::EmitOMPTargetParallelForDirective( // TODO: codegen for target parallel for. } +/// Emit a helper variable and return corresponding lvalue. +static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, + const ImplicitParamDecl *PVD, + CodeGenFunction::OMPPrivateScope &Privates) { + auto *VDecl = cast(Helper->getDecl()); + Privates.addPrivate( + VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); }); +} + +void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { + assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); + // Emit outlined function for task construct. + auto CS = cast(S.getAssociatedStmt()); + auto CapturedStruct = GenerateCapturedStmtArgument(*CS); + auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); + const Expr *IfCond = nullptr; + for (const auto *C : S.getClausesOfKind()) { + if (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_taskloop) { + IfCond = C->getCondition(); + break; + } + } + bool Nogroup = S.getSingleClause(); + // TODO: Check if we should emit tied or untied task. + // Check if the task is final + llvm::PointerIntPair Final; + if (const auto *Clause = S.getSingleClause()) { + // If the condition constant folds and can be elided, try to avoid emitting + // the condition and the dead arm of the if/else. + auto *Cond = Clause->getCondition(); + bool CondConstant; + if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) + Final.setInt(CondConstant); + else + Final.setPointer(EvaluateExprAsBool(Cond)); + } else { + // By default the task is not final. + Final.setInt(/*IntVal=*/false); + } + + auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { + // if (PreCond) { + // for (IV in 0..LastIteration) BODY; + // ; + // } + // + + // Emit: if (PreCond) - begin. + // If the condition constant folds and can be elided, avoid emitting the + // whole loop. + bool CondConstant; + llvm::BasicBlock *ContBlock = nullptr; + OMPLoopScope PreInitScope(CGF, S); + if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { + if (!CondConstant) + return; + } else { + auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); + ContBlock = CGF.createBasicBlock("taskloop.if.end"); + emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, + CGF.getProfileCount(&S)); + CGF.EmitBlock(ThenBlock); + CGF.incrementProfileCounter(&S); + } + + OMPPrivateScope LoopScope(CGF); + // Emit helper vars inits. + enum { LowerBound = 5, UpperBound, Stride, LastIter }; + auto *I = CS->getCapturedDecl()->param_begin(); + auto *LBP = std::next(I, LowerBound); + auto *UBP = std::next(I, UpperBound); + auto *STP = std::next(I, Stride); + auto *LIP = std::next(I, LastIter); + mapParam(CGF, cast(S.getLowerBoundVariable()), *LBP, + LoopScope); + mapParam(CGF, cast(S.getUpperBoundVariable()), *UBP, + LoopScope); + mapParam(CGF, cast(S.getStrideVariable()), *STP, LoopScope); + mapParam(CGF, cast(S.getIsLastIterVariable()), *LIP, + LoopScope); + CGF.EmitOMPPrivateLoopCounters(S, LoopScope); + (void)LoopScope.Privatize(); + // Emit the loop iteration variable. + const Expr *IVExpr = S.getIterationVariable(); + const VarDecl *IVDecl = cast(cast(IVExpr)->getDecl()); + CGF.EmitVarDecl(*IVDecl); + CGF.EmitIgnoredExpr(S.getInit()); + + // Emit the iterations count variable. + // If it is not a variable, Sema decided to calculate iterations count on + // each iteration (e.g., it is foldable into a constant). + if (auto LIExpr = dyn_cast(S.getLastIteration())) { + CGF.EmitVarDecl(*cast(LIExpr->getDecl())); + // Emit calculation of the iterations count. + CGF.EmitIgnoredExpr(S.getCalcLastIteration()); + } + + CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), + S.getInc(), + [&S](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S, JumpDest()); + CGF.EmitStopPoint(&S); + }, + [](CodeGenFunction &) {}); + // Emit: if (PreCond) - end. + if (ContBlock) { + CGF.EmitBranch(ContBlock); + CGF.EmitBlock(ContBlock, true); + } + }; + auto &&TaskGen = [&S, SharedsTy, CapturedStruct, IfCond, &Final, + Nogroup](CodeGenFunction &CGF, llvm::Value *OutlinedFn, + const OMPPrivateDataTy &Data) { + auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { + OMPLoopScope PreInitScope(CGF, S); + CGF.CGM.getOpenMPRuntime().emitTaskLoopCall( + CGF, S.getLocStart(), S, Data.Tied, Final, Nogroup, + Data.NumberOfParts, OutlinedFn, SharedsTy, CapturedStruct, IfCond, + Data.PrivateVars, Data.PrivateCopies, Data.FirstprivateVars, + Data.FirstprivateCopies, Data.FirstprivateInits); + }; + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, + CodeGen); + }; + EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, /*Tied=*/true); +} + void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { - // emit the code inside the construct for now - OMPLexicalScope Scope(*this, S); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_taskloop, [&S](CodeGenFunction &CGF, PrePostActionTy &) { - OMPLoopScope PreInitScope(CGF, S); - CGF.EmitStmt( - cast(S.getAssociatedStmt())->getCapturedStmt()); - }); + EmitOMPTaskLoopBasedDirective(S); } void CodeGenFunction::EmitOMPTaskLoopSimdDirective( diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 17b7fcaa25..148a9453f6 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -85,6 +85,7 @@ class BlockByrefHelpers; class BlockByrefInfo; class BlockFlags; class BlockFieldFlags; +class RegionCodeGenTy; class TargetCodeGenInfo; /// The kind of evaluation to perform on values of a particular @@ -2340,6 +2341,24 @@ public: /// \param D Directive (possibly) with the 'linear' clause. void EmitOMPLinearClauseInit(const OMPLoopDirective &D); + struct OMPPrivateDataTy { + bool Tied; + unsigned NumberOfParts; + SmallVector PrivateVars; + SmallVector PrivateCopies; + SmallVector FirstprivateVars; + SmallVector FirstprivateCopies; + SmallVector FirstprivateInits; + SmallVector, 4> Dependences; + }; + typedef const llvm::function_ref + TaskGenTy; + void EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, + const RegionCodeGenTy &BodyGen, + const TaskGenTy &TaskGen, bool Tied); + void EmitOMPParallelDirective(const OMPParallelDirective &S); void EmitOMPSimdDirective(const OMPSimdDirective &S); void EmitOMPForDirective(const OMPForDirective &S); @@ -2371,6 +2390,7 @@ public: void EmitOMPCancellationPointDirective(const OMPCancellationPointDirective &S); void EmitOMPCancelDirective(const OMPCancelDirective &S); + void EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S); void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S); void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S); void EmitOMPDistributeDirective(const OMPDistributeDirective &S); diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp index 97647c3f99..d2c08ca972 100644 --- a/lib/Sema/SemaOpenMP.cpp +++ b/lib/Sema/SemaOpenMP.cpp @@ -1675,11 +1675,37 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { break; } case OMPD_taskloop: { + QualType KmpInt32Ty = + Context.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); + QualType KmpUInt64Ty = + Context.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); + QualType KmpInt64Ty = + Context.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); + QualType Args[] = {Context.VoidPtrTy.withConst().withRestrict()}; + FunctionProtoType::ExtProtoInfo EPI; + EPI.Variadic = true; + QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI); Sema::CapturedParamNameType Params[] = { + std::make_pair(".global_tid.", KmpInt32Ty), + std::make_pair(".part_id.", Context.getPointerType(KmpInt32Ty)), + std::make_pair(".privates.", + Context.VoidPtrTy.withConst().withRestrict()), + std::make_pair( + ".copy_fn.", + Context.getPointerType(CopyFnType).withConst().withRestrict()), + std::make_pair(".task_t.", Context.VoidPtrTy.withConst()), + std::make_pair(".lb.", KmpUInt64Ty), + std::make_pair(".ub.", KmpUInt64Ty), std::make_pair(".st.", KmpInt64Ty), + std::make_pair(".liter.", KmpInt32Ty), std::make_pair(StringRef(), QualType()) // __context with shared vars }; ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, Params); + // Mark this captured region as inlined, because we don't use outlined + // function directly. + getCurCapturedRegion()->TheCapturedDecl->addAttr( + AlwaysInlineAttr::CreateImplicit( + Context, AlwaysInlineAttr::Keyword_forceinline, SourceRange())); break; } case OMPD_taskloop_simd: { @@ -4614,6 +4640,15 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr, LastIteration32.get()->getType()->hasSignedIntegerRepresentation(), LastIteration64.get(), SemaRef))) LastIteration = LastIteration32; + QualType VType = LastIteration.get()->getType(); + QualType RealVType = VType; + QualType StrideVType = VType; + if (isOpenMPTaskLoopDirective(DKind)) { + VType = + SemaRef.Context.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); + StrideVType = + SemaRef.Context.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); + } if (!LastIteration.isUsable()) return 0; @@ -4649,7 +4684,6 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr, SourceLocation InitLoc = IterSpaces[0].InitSrcRange.getBegin(); - QualType VType = LastIteration.get()->getType(); // Build variables passed into runtime, nesessary for worksharing directives. ExprResult LB, UB, IL, ST, EUB; if (isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) || @@ -4678,8 +4712,9 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr, /*DirectInit*/ false, /*TypeMayContainAuto*/ false); // Stride variable returned by runtime (we initialize it to 1 by default). - VarDecl *STDecl = buildVarDecl(SemaRef, InitLoc, VType, ".omp.stride"); - ST = buildDeclRefExpr(SemaRef, STDecl, VType, InitLoc); + VarDecl *STDecl = + buildVarDecl(SemaRef, InitLoc, StrideVType, ".omp.stride"); + ST = buildDeclRefExpr(SemaRef, STDecl, StrideVType, InitLoc); SemaRef.AddInitializerToDecl( STDecl, SemaRef.ActOnIntegerConstant(InitLoc, 1).get(), /*DirectInit*/ false, /*TypeMayContainAuto*/ false); @@ -4699,8 +4734,8 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr, ExprResult IV; ExprResult Init; { - VarDecl *IVDecl = buildVarDecl(SemaRef, InitLoc, VType, ".omp.iv"); - IV = buildDeclRefExpr(SemaRef, IVDecl, VType, InitLoc); + VarDecl *IVDecl = buildVarDecl(SemaRef, InitLoc, RealVType, ".omp.iv"); + IV = buildDeclRefExpr(SemaRef, IVDecl, RealVType, InitLoc); Expr *RHS = (isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) || isOpenMPDistributeDirective(DKind)) diff --git a/test/OpenMP/taskloop_codegen.cpp b/test/OpenMP/taskloop_codegen.cpp new file mode 100644 index 0000000000..f3ae1b1220 --- /dev/null +++ b/test/OpenMP/taskloop_codegen.cpp @@ -0,0 +1,198 @@ +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck %s +// expected-no-diagnostics +// REQUIRES: x86-registered-target +#ifndef HEADER +#define HEADER + +// CHECK-LABEL: @main +int main(int argc, char **argv) { +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEFLOC:@.+]]) +// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK1:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* +// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 +// CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3 +// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}} +// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4 +// CHECK: store i64 0, i64* [[DOWN]], +// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5 +// CHECK: store i64 9, i64* [[UP]], +// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6 +// CHECK: store i64 1, i64* [[ST]], +// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], +// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null) +#pragma omp taskloop + for (int i = 0; i < 10; ++i) + ; +// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK2:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* +// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 +// CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3 +// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}} +// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4 +// CHECK: store i64 0, i64* [[DOWN]], +// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5 +// CHECK: store i64 9, i64* [[UP]], +// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6 +// CHECK: store i64 1, i64* [[ST]], +// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], +// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 0, i64 0, i8* null) +#pragma omp taskloop nogroup + for (int i = 0; i < 10; ++i) + ; +// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* +// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 +// CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3 +// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}} +// CHECK: [[IF:%.+]] = icmp ne i32 %{{.+}}, 0 +// CHECK: [[IF_INT:%.+]] = sext i1 [[IF]] to i32 +// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4 +// CHECK: store i64 0, i64* [[DOWN]], +// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5 +// CHECK: store i64 %{{.+}}, i64* [[UP]], +// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6 +// CHECK: store i64 1, i64* [[ST]], +// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], +// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 [[IF_INT]], i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null) + int i; +#pragma omp taskloop if(argc) shared(argc, argv) collapse(2) + for (i = 0; i < argc; ++i) + for (int j = argc; j < argv[argc][argc]; ++j) + ; +} + +// CHECK: define internal i32 [[TASK1]]( +// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4 +// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]], +// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5 +// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]], +// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6 +// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], +// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7 +// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]], +// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]], +// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]], +// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]], +// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]], +// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]], +// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32 +// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]], +// CHECK: br label +// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]], +// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64 +// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]], +// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]] +// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}} +// CHECK: load i32, i32* % +// CHECK: store i32 % +// CHECK: load i32, i32* % +// CHECK: add nsw i32 %{{.+}}, 1 +// CHECK: store i32 %{{.+}}, i32* % +// CHECK: br label % +// CHECK: ret i32 0 + +// CHECK: define internal i32 [[TASK2]]( +// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4 +// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]], +// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5 +// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]], +// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6 +// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], +// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7 +// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]], +// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]], +// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]], +// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]], +// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]], +// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]], +// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32 +// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]], +// CHECK: br label +// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]], +// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64 +// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]], +// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]] +// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}} +// CHECK: load i32, i32* % +// CHECK: store i32 % +// CHECK: load i32, i32* % +// CHECK: add nsw i32 %{{.+}}, 1 +// CHECK: store i32 %{{.+}}, i32* % +// CHECK: br label % +// CHECK: ret i32 0 + +// CHECK: define internal i32 [[TASK3]]( +// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4 +// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]], +// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5 +// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]], +// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6 +// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], +// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7 +// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]], +// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]], +// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]], +// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]], +// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]], +// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]], +// CHECK: store i64 [[LB_VAL]], i64* [[CNT:%.+]], +// CHECK: br label +// CHECK: ret i32 0 + +// CHECK-LABEL: @_ZN1SC2Ei +struct S { + int a; + S(int c) { +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEFLOC:@.+]]) +// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK4:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* +// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 +// CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3 +// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}} +// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4 +// CHECK: store i64 0, i64* [[DOWN]], +// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5 +// CHECK: store i64 %{{.+}}, i64* [[UP]], +// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6 +// CHECK: store i64 1, i64* [[ST]], +// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], +// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null) +#pragma omp taskloop shared(c) + for (a = 0; a < c; ++a) + ; + } +} s(1); + +// CHECK: define internal i32 [[TASK4]]( +// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4 +// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]], +// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5 +// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]], +// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6 +// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], +// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7 +// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]], +// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]], +// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]], +// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]], +// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]], +// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]], +// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32 +// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]], +// CHECK: br label +// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]], +// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64 +// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]], +// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]] +// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}} +// CHECK: load i32, i32* % +// CHECK: store i32 % +// CHECK: load i32, i32* % +// CHECK: add nsw i32 %{{.+}}, 1 +// CHECK: store i32 %{{.+}}, i32* % +// CHECK: br label % +// CHECK: ret i32 0 + +#endif -- 2.40.0