// Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
// kmp_int32 num_teams, kmp_int32 thread_limit);
OMPRTL__kmpc_push_num_teams,
- /// \brief Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc,
- /// kmpc_micro microtask, ...);
+ // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
+ // microtask, ...);
OMPRTL__kmpc_fork_teams,
+ // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
+ // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
+ // sched, kmp_uint64 grainsize, void *task_dup);
+ OMPRTL__kmpc_taskloop,
//
// Offloading related calls
assert(!ThreadIDVar->getType()->isPointerType() &&
"thread id variable must be of type kmp_int32 for tasks");
auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
+ auto *TD = dyn_cast<OMPTaskDirective>(&D);
CodeGenFunction CGF(CGM, true);
- CGOpenMPTaskOutlinedRegionInfo CGInfo(
- *CS, ThreadIDVar, CodeGen, InnermostKind,
- cast<OMPTaskDirective>(D).hasCancel(), Action);
+ CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
+ InnermostKind,
+ TD ? TD->hasCancel() : false, Action);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
auto *Res = CGF.GenerateCapturedStmtFunction(*CS);
if (!Tied)
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
break;
}
+ case OMPRTL__kmpc_taskloop: {
+ // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
+ // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
+ // sched, kmp_uint64 grainsize, void *task_dup);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
+ CGM.IntTy,
+ CGM.VoidPtrTy,
+ CGM.IntTy,
+ CGM.Int64Ty->getPointerTo(),
+ CGM.Int64Ty->getPointerTo(),
+ CGM.Int64Ty,
+ CGM.IntTy,
+ CGM.IntTy,
+ CGM.Int64Ty,
+ CGM.VoidPtrTy};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
+ break;
+ }
case OMPRTL__tgt_target: {
// Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
// arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
KmpTaskTPartId,
/// \brief Function with call of destructors for private variables.
KmpTaskTDestructors,
+ /// (Taskloops only) Lower bound.
+ KmpTaskTLowerBound,
+ /// (Taskloops only) Upper bound.
+ KmpTaskTUpperBound,
+ /// (Taskloops only) Stride.
+ KmpTaskTStride,
+ /// (Taskloops only) Is last iteration flag.
+ KmpTaskTLastIter,
};
} // anonymous namespace
}
static RecordDecl *
-createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty,
+createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
+ QualType KmpInt32Ty,
QualType KmpRoutineEntryPointerQTy) {
auto &C = CGM.getContext();
// Build struct kmp_task_t {
// kmp_routine_entry_t routine;
// kmp_int32 part_id;
// kmp_routine_entry_t destructors;
+ // For taskloops additional fields:
+ // kmp_uint64 lb;
+ // kmp_uint64 ub;
+ // kmp_int64 st;
+ // kmp_int32 liter;
// };
auto *RD = C.buildImplicitRecord("kmp_task_t");
RD->startDefinition();
addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
addFieldToRecordDecl(C, RD, KmpInt32Ty);
addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
+ if (isOpenMPTaskLoopDirective(Kind)) {
+ QualType KmpUInt64Ty =
+ CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
+ QualType KmpInt64Ty =
+ CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
+ addFieldToRecordDecl(C, RD, KmpUInt64Ty);
+ addFieldToRecordDecl(C, RD, KmpUInt64Ty);
+ addFieldToRecordDecl(C, RD, KmpInt64Ty);
+ addFieldToRecordDecl(C, RD, KmpInt32Ty);
+ }
RD->completeDefinition();
return RD;
}
/// \code
/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
+/// For taskloops:
+/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
/// tt->shareds);
/// return 0;
/// }
/// \endcode
static llvm::Value *
emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
- QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy,
+ OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
+ QualType KmpTaskTWithPrivatesPtrQTy,
QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
QualType SharedsPtrTy, llvm::Value *TaskFunction,
llvm::Value *TaskPrivatesMap) {
CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
// TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
- // tt, tt->task_data.shareds);
+ // tt,
+ // For taskloops:
+ // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
+ // tt->task_data.shareds);
auto *GtidParam = CGF.EmitLoadOfScalar(
CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
LValue TDBase = CGF.EmitLoadOfPointerLValue(
auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
PrivatesLVal.getPointer(), CGF.VoidPtrTy);
- } else {
+ } else
PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
- }
- llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam,
- TaskPrivatesMap,
- CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- TDBase.getAddress(), CGF.VoidPtrTy)
- .getPointer(),
- SharedsParam};
+ llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
+ TaskPrivatesMap,
+ CGF.Builder
+ .CreatePointerBitCastOrAddrSpaceCast(
+ TDBase.getAddress(), CGF.VoidPtrTy)
+ .getPointer()};
+ SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
+ std::end(CommonArgs));
+ if (isOpenMPTaskLoopDirective(Kind)) {
+ auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
+ auto LBLVal = CGF.EmitLValueForField(Base, *LBFI);
+ auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal();
+ auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
+ auto UBLVal = CGF.EmitLValueForField(Base, *UBFI);
+ auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal();
+ auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
+ auto StLVal = CGF.EmitLValueForField(Base, *StFI);
+ auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal();
+ auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
+ auto LILVal = CGF.EmitLValueForField(Base, *LIFI);
+ auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal();
+ CallArgs.push_back(LBParam);
+ CallArgs.push_back(UBParam);
+ CallArgs.push_back(StParam);
+ CallArgs.push_back(LIParam);
+ }
+ CallArgs.push_back(SharedsParam);
+
CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
CGF.EmitStoreThroughLValue(
RValue::get(CGF.Builder.getInt32(/*C=*/0)),
return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
}
-void CGOpenMPRuntime::emitTaskCall(
+CGOpenMPRuntime::TaskDataTy CGOpenMPRuntime::emitTaskInit(
CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy,
- Address Shareds, const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
+ Address Shareds, ArrayRef<const Expr *> PrivateVars,
ArrayRef<const Expr *> PrivateCopies,
ArrayRef<const Expr *> FirstprivateVars,
ArrayRef<const Expr *> FirstprivateCopies,
- ArrayRef<const Expr *> FirstprivateInits,
- ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) {
- if (!CGF.HaveInsertPoint())
- return;
+ ArrayRef<const Expr *> FirstprivateInits) {
auto &C = CGM.getContext();
- llvm::SmallVector<PrivateDataTy, 8> Privates;
+ llvm::SmallVector<PrivateDataTy, 4> Privates;
// Aggregate privates and sort them by the alignment.
auto I = PrivateCopies.begin();
for (auto *E : PrivateVars) {
emitKmpRoutineEntryT(KmpInt32Ty);
// Build type kmp_task_t (if not built yet).
if (KmpTaskTQTy.isNull()) {
- KmpTaskTQTy = C.getRecordType(
- createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy));
+ KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
+ CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
}
auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
// Build particular struct kmp_task_t for the given task.
// Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
// kmp_task_t *tt);
auto *TaskEntry = emitProxyTaskFunction(
- CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy,
- KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap);
+ CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
+ KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
+ TaskPrivatesMap);
// Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
// kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
DestructorFn, KmpRoutineEntryPtrTy),
Destructor);
+ TaskDataTy Data;
+ Data.NewTask = NewTask;
+ Data.TaskEntry = TaskEntry;
+ Data.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
+ Data.TDBase = TDBase;
+ Data.KmpTaskTQTyRD = KmpTaskTQTyRD;
+ return Data;
+}
+void CGOpenMPRuntime::emitTaskCall(
+ CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
+ bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
+ unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy,
+ Address Shareds, const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
+ ArrayRef<const Expr *> PrivateCopies,
+ ArrayRef<const Expr *> FirstprivateVars,
+ ArrayRef<const Expr *> FirstprivateCopies,
+ ArrayRef<const Expr *> FirstprivateInits,
+ ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) {
+ if (!CGF.HaveInsertPoint())
+ return;
+
+ TaskDataTy Data =
+ emitTaskInit(CGF, Loc, D, Tied, Final, NumberOfParts, TaskFunction,
+ SharedsTy, Shareds, PrivateVars, PrivateCopies,
+ FirstprivateVars, FirstprivateCopies, FirstprivateInits);
+ llvm::Value *NewTask = Data.NewTask;
+ llvm::Value *TaskEntry = Data.TaskEntry;
+ llvm::Value *NewTaskNewTaskTTy = Data.NewTaskNewTaskTTy;
+ LValue TDBase = Data.TDBase;
+ RecordDecl *KmpTaskTQTyRD = Data.KmpTaskTQTyRD;
+ auto &C = CGM.getContext();
// Process list of dependences.
Address DependenciesArray = Address::invalid();
unsigned NumDependencies = Dependences.size();
}
}
+void CGOpenMPRuntime::emitTaskLoopCall(
+ CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
+ bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final, bool Nogroup,
+ unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy,
+ Address Shareds, const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
+ ArrayRef<const Expr *> PrivateCopies,
+ ArrayRef<const Expr *> FirstprivateVars,
+ ArrayRef<const Expr *> FirstprivateCopies,
+ ArrayRef<const Expr *> FirstprivateInits) {
+ if (!CGF.HaveInsertPoint())
+ return;
+ TaskDataTy Data =
+ emitTaskInit(CGF, Loc, D, Tied, Final, NumberOfParts, TaskFunction,
+ SharedsTy, Shareds, PrivateVars, PrivateCopies,
+ FirstprivateVars, FirstprivateCopies, FirstprivateInits);
+ // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
+ // libcall.
+ // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
+ // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
+ // sched, kmp_uint64 grainsize, void *task_dup);
+ llvm::Value *ThreadID = getThreadID(CGF, Loc);
+ llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
+ llvm::Value *IfVal;
+ if (IfCond) {
+ IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
+ /*isSigned=*/true);
+ } else
+ IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
+
+ LValue LBLVal = CGF.EmitLValueForField(
+ Data.TDBase,
+ *std::next(Data.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
+ auto *LBVar =
+ cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
+ CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
+ /*IsInitializer=*/true);
+ LValue UBLVal = CGF.EmitLValueForField(
+ Data.TDBase,
+ *std::next(Data.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
+ auto *UBVar =
+ cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
+ CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
+ /*IsInitializer=*/true);
+ LValue StLVal = CGF.EmitLValueForField(
+ Data.TDBase,
+ *std::next(Data.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
+ auto *StVar =
+ cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
+ CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
+ /*IsInitializer=*/true);
+ llvm::Value *TaskArgs[] = {
+ UpLoc,
+ ThreadID,
+ Data.NewTask,
+ IfVal,
+ LBLVal.getPointer(),
+ UBLVal.getPointer(),
+ CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
+ llvm::ConstantInt::getSigned(CGF.IntTy, Nogroup ? 1 : 0),
+ llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/0),
+ llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
+ llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
+ CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
+}
+
/// \brief Emit reduction operation for each element of array (required for
/// array sections) LHS op = RHS.
/// \param Type Type of array.
#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
+#include "CGValue.h"
#include "clang/AST/Type.h"
#include "clang/Basic/OpenMPKinds.h"
#include "clang/Basic/SourceLocation.h"
class Expr;
class GlobalDecl;
class OMPExecutableDirective;
+class OMPLoopDirective;
class VarDecl;
class OMPDeclareReductionDecl;
class IdentifierInfo;
///
llvm::Value *getCriticalRegionLock(StringRef CriticalName);
+ struct TaskDataTy {
+ llvm::Value *NewTask;
+ llvm::Value *TaskEntry;
+ llvm::Value *NewTaskNewTaskTTy;
+ LValue TDBase;
+ RecordDecl *KmpTaskTQTyRD;
+ };
+ /// Emit task region for the task directive. The task region is emitted in
+ /// several steps:
+ /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32
+ /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+ /// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the
+ /// function:
+ /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
+ /// TaskFunction(gtid, tt->part_id, tt->shareds);
+ /// return 0;
+ /// }
+ /// 2. Copy a list of shared variables to field shareds of the resulting
+ /// structure kmp_task_t returned by the previous call (if any).
+ /// 3. Copy a pointer to destructions function to field destructions of the
+ /// resulting structure kmp_task_t.
+ /// \param D Current task directive.
+ /// \param Tied true if the task is tied (the task is tied to the thread that
+ /// can suspend its task region), false - untied (the task is not tied to any
+ /// thread).
+ /// \param Final Contains either constant bool value, or llvm::Value * of i1
+ /// type for final clause. If the value is true, the task forces all of its
+ /// child tasks to become final and included tasks.
+ /// \param NumberOfParts Number of parts in untied tasks.
+ /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32
+ /// /*part_id*/, captured_struct */*__context*/);
+ /// \param SharedsTy A type which contains references the shared variables.
+ /// \param Shareds Context with the list of shared variables from the \p
+ /// TaskFunction.
+ /// \param IfCond Not a nullptr if 'if' clause was specified, nullptr
+ /// otherwise.
+ /// \param PrivateVars List of references to private variables for the task
+ /// directive.
+ /// \param PrivateCopies List of private copies for each private variable in
+ /// \p PrivateVars.
+ /// \param FirstprivateVars List of references to private variables for the
+ /// task directive.
+ /// \param FirstprivateCopies List of private copies for each private variable
+ /// in \p FirstprivateVars.
+ /// \param FirstprivateInits List of references to auto generated variables
+ /// used for initialization of a single array element. Used if firstprivate
+ /// variable is of array type.
+ TaskDataTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
+ const OMPExecutableDirective &D, bool Tied,
+ llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
+ unsigned NumberOfParts, llvm::Value *TaskFunction,
+ QualType SharedsTy, Address Shareds,
+ ArrayRef<const Expr *> PrivateVars,
+ ArrayRef<const Expr *> PrivateCopies,
+ ArrayRef<const Expr *> FirstprivateVars,
+ ArrayRef<const Expr *> FirstprivateCopies,
+ ArrayRef<const Expr *> FirstprivateInits);
+
public:
explicit CGOpenMPRuntime(CodeGenModule &CGM);
virtual ~CGOpenMPRuntime() {}
ArrayRef<const Expr *> FirstprivateInits,
ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences);
+ /// Emit task region for the taskloop directive. The taskloop region is
+ /// emitted in several steps:
+ /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32
+ /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+ /// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the
+ /// function:
+ /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
+ /// TaskFunction(gtid, tt->part_id, tt->shareds);
+ /// return 0;
+ /// }
+ /// 2. Copy a list of shared variables to field shareds of the resulting
+ /// structure kmp_task_t returned by the previous call (if any).
+ /// 3. Copy a pointer to destructions function to field destructions of the
+ /// resulting structure kmp_task_t.
+ /// 4. Emit a call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t
+ /// *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int
+ /// nogroup, int sched, kmp_uint64 grainsize, void *task_dup ), where new_task
+ /// is a resulting structure from
+ /// previous items.
+ /// \param D Current task directive.
+ /// \param Tied true if the task is tied (the task is tied to the thread that
+ /// can suspend its task region), false - untied (the task is not tied to any
+ /// thread).
+ /// \param Final Contains either constant bool value, or llvm::Value * of i1
+ /// type for final clause. If the value is true, the task forces all of its
+ /// child tasks to become final and included tasks.
+ /// \param Nogroup true if nogroup clause was specified, false otherwise.
+ /// \param NumberOfParts Number of parts in untied taskloops.
+ /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32
+ /// /*part_id*/, captured_struct */*__context*/);
+ /// \param SharedsTy A type which contains references the shared variables.
+ /// \param Shareds Context with the list of shared variables from the \p
+ /// TaskFunction.
+ /// \param IfCond Not a nullptr if 'if' clause was specified, nullptr
+ /// otherwise.
+ /// \param PrivateVars List of references to private variables for the task
+ /// directive.
+ /// \param PrivateCopies List of private copies for each private variable in
+ /// \p PrivateVars.
+ /// \param FirstprivateVars List of references to private variables for the
+ /// task directive.
+ /// \param FirstprivateCopies List of private copies for each private variable
+ /// in \p FirstprivateVars.
+ /// \param FirstprivateInits List of references to auto generated variables
+ /// used for initialization of a single array element. Used if firstprivate
+ /// variable is of array type.
+ virtual void emitTaskLoopCall(
+ CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
+ bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
+ bool Nogroup, unsigned NumberOfParts, llvm::Value *TaskFunction,
+ QualType SharedsTy, Address Shareds, const Expr *IfCond,
+ ArrayRef<const Expr *> PrivateVars, ArrayRef<const Expr *> PrivateCopies,
+ ArrayRef<const Expr *> FirstprivateVars,
+ ArrayRef<const Expr *> FirstprivateCopies,
+ ArrayRef<const Expr *> FirstprivateInits);
+
/// \brief Emit code for the directive that does not require outlining.
///
/// \param InnermostKind Kind of innermost directive (for simple directives it
// IV < UB
BoolCondVal = EvaluateExprAsBool(S.getCond());
} else {
- BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned,
- IL, LB, UB, ST);
+ BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL,
+ LB, UB, ST);
}
// If there are any cleanups between here and the loop-exit scope,
emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen);
}
-void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
+void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
+ const RegionCodeGenTy &BodyGen,
+ const TaskGenTy &TaskGen,
+ bool Tied) {
// Emit outlined function for task construct.
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
- auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
auto *I = CS->getCapturedDecl()->param_begin();
auto *PartId = std::next(I);
auto *TaskT = std::next(I, 4);
// part id (0 for tied tasks, >=0 for untied task).
llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
// Get list of private variables.
- llvm::SmallVector<const Expr *, 8> PrivateVars;
- llvm::SmallVector<const Expr *, 8> PrivateCopies;
+ OMPPrivateDataTy Data;
+ Data.Tied = Tied;
for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
auto IRef = C->varlist_begin();
for (auto *IInit : C->private_copies()) {
auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
- PrivateVars.push_back(*IRef);
- PrivateCopies.push_back(IInit);
+ Data.PrivateVars.push_back(*IRef);
+ Data.PrivateCopies.push_back(IInit);
}
++IRef;
}
}
EmittedAsPrivate.clear();
// Get list of firstprivate variables.
- llvm::SmallVector<const Expr *, 8> FirstprivateVars;
- llvm::SmallVector<const Expr *, 8> FirstprivateCopies;
- llvm::SmallVector<const Expr *, 8> FirstprivateInits;
for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
auto IRef = C->varlist_begin();
auto IElemInitRef = C->inits().begin();
for (auto *IInit : C->private_copies()) {
auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
- FirstprivateVars.push_back(*IRef);
- FirstprivateCopies.push_back(IInit);
- FirstprivateInits.push_back(*IElemInitRef);
+ Data.FirstprivateVars.push_back(*IRef);
+ Data.FirstprivateCopies.push_back(IInit);
+ Data.FirstprivateInits.push_back(*IElemInitRef);
}
++IRef;
++IElemInitRef;
}
}
// Build list of dependences.
- llvm::SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 8>
- Dependences;
- for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
- for (auto *IRef : C->varlists()) {
- Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
- }
- }
- auto &&CodeGen = [&S, &PrivateVars, &FirstprivateVars](
- CodeGenFunction &CGF, PrePostActionTy &Action) {
- OMPPrivateScope Scope(CGF);
+ for (const auto *C : S.getClausesOfKind<OMPDependClause>())
+ for (auto *IRef : C->varlists())
+ Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
+ auto &&CodeGen = [PartId, &S, &Data, CS, &BodyGen](CodeGenFunction &CGF,
+ PrePostActionTy &Action) {
// Set proper addresses for generated private copies.
- auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
- if (!PrivateVars.empty() || !FirstprivateVars.empty()) {
+ OMPPrivateScope Scope(CGF);
+ if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty()) {
auto *CopyFn = CGF.Builder.CreateLoad(
CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
auto *PrivatesPtr = CGF.Builder.CreateLoad(
llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
llvm::SmallVector<llvm::Value *, 16> CallArgs;
CallArgs.push_back(PrivatesPtr);
- for (auto *E : PrivateVars) {
+ for (auto *E : Data.PrivateVars) {
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
Address PrivatePtr = CGF.CreateMemTemp(
CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
CallArgs.push_back(PrivatePtr.getPointer());
}
- for (auto *E : FirstprivateVars) {
+ for (auto *E : Data.FirstprivateVars) {
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
Address PrivatePtr =
CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
(void)Scope.Privatize();
Action.Enter(CGF);
- CGF.EmitStmt(CS->getCapturedStmt());
+ BodyGen(CGF);
};
+ auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
+ S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
+ Data.NumberOfParts);
+ OMPLexicalScope Scope(*this, S);
+ TaskGen(*this, OutlinedFn, Data);
+}
+
+void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
+ // Emit outlined function for task construct.
+ auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
+ auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
// Check if we should emit tied or untied task.
bool Tied = !S.getSingleClause<OMPUntiedClause>();
- unsigned NumberOfParts;
- auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
- S, *I, *PartId, *TaskT, OMPD_task, CodeGen, Tied, NumberOfParts);
// Check if the task is final
llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
break;
}
}
- OMPLexicalScope Scope(*this, S);
- CGM.getOpenMPRuntime().emitTaskCall(
- *this, S.getLocStart(), S, Tied, Final, NumberOfParts, OutlinedFn,
- SharedsTy, CapturedStruct, IfCond, PrivateVars, PrivateCopies,
- FirstprivateVars, FirstprivateCopies, FirstprivateInits, Dependences);
+
+ auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
+ CGF.EmitStmt(CS->getCapturedStmt());
+ };
+ auto &&TaskGen = [&S, &Final, SharedsTy, CapturedStruct,
+ IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
+ const OMPPrivateDataTy &Data) {
+ CGF.CGM.getOpenMPRuntime().emitTaskCall(
+ CGF, S.getLocStart(), S, Data.Tied, Final, Data.NumberOfParts,
+ OutlinedFn, SharedsTy, CapturedStruct, IfCond, Data.PrivateVars,
+ Data.PrivateCopies, Data.FirstprivateVars, Data.FirstprivateCopies,
+ Data.FirstprivateInits, Data.Dependences);
+ };
+ EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Tied);
}
void CodeGenFunction::EmitOMPTaskyieldDirective(
// TODO: codegen for target parallel for.
}
+/// Emit a helper variable and return corresponding lvalue.
+static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
+ const ImplicitParamDecl *PVD,
+ CodeGenFunction::OMPPrivateScope &Privates) {
+ auto *VDecl = cast<VarDecl>(Helper->getDecl());
+ Privates.addPrivate(
+ VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
+}
+
+void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
+ assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
+ // Emit outlined function for task construct.
+ auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
+ auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
+ auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
+ const Expr *IfCond = nullptr;
+ for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
+ if (C->getNameModifier() == OMPD_unknown ||
+ C->getNameModifier() == OMPD_taskloop) {
+ IfCond = C->getCondition();
+ break;
+ }
+ }
+ bool Nogroup = S.getSingleClause<OMPNogroupClause>();
+ // TODO: Check if we should emit tied or untied task.
+ // Check if the task is final
+ llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
+ if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
+ // If the condition constant folds and can be elided, try to avoid emitting
+ // the condition and the dead arm of the if/else.
+ auto *Cond = Clause->getCondition();
+ bool CondConstant;
+ if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
+ Final.setInt(CondConstant);
+ else
+ Final.setPointer(EvaluateExprAsBool(Cond));
+ } else {
+ // By default the task is not final.
+ Final.setInt(/*IntVal=*/false);
+ }
+
+ auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
+ // if (PreCond) {
+ // for (IV in 0..LastIteration) BODY;
+ // <Final counter/linear vars updates>;
+ // }
+ //
+
+ // Emit: if (PreCond) - begin.
+ // If the condition constant folds and can be elided, avoid emitting the
+ // whole loop.
+ bool CondConstant;
+ llvm::BasicBlock *ContBlock = nullptr;
+ OMPLoopScope PreInitScope(CGF, S);
+ if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
+ if (!CondConstant)
+ return;
+ } else {
+ auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
+ ContBlock = CGF.createBasicBlock("taskloop.if.end");
+ emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
+ CGF.getProfileCount(&S));
+ CGF.EmitBlock(ThenBlock);
+ CGF.incrementProfileCounter(&S);
+ }
+
+ OMPPrivateScope LoopScope(CGF);
+ // Emit helper vars inits.
+ enum { LowerBound = 5, UpperBound, Stride, LastIter };
+ auto *I = CS->getCapturedDecl()->param_begin();
+ auto *LBP = std::next(I, LowerBound);
+ auto *UBP = std::next(I, UpperBound);
+ auto *STP = std::next(I, Stride);
+ auto *LIP = std::next(I, LastIter);
+ mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
+ LoopScope);
+ mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
+ LoopScope);
+ mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
+ mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
+ LoopScope);
+ CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
+ (void)LoopScope.Privatize();
+ // Emit the loop iteration variable.
+ const Expr *IVExpr = S.getIterationVariable();
+ const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
+ CGF.EmitVarDecl(*IVDecl);
+ CGF.EmitIgnoredExpr(S.getInit());
+
+ // Emit the iterations count variable.
+ // If it is not a variable, Sema decided to calculate iterations count on
+ // each iteration (e.g., it is foldable into a constant).
+ if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
+ CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
+ // Emit calculation of the iterations count.
+ CGF.EmitIgnoredExpr(S.getCalcLastIteration());
+ }
+
+ CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
+ S.getInc(),
+ [&S](CodeGenFunction &CGF) {
+ CGF.EmitOMPLoopBody(S, JumpDest());
+ CGF.EmitStopPoint(&S);
+ },
+ [](CodeGenFunction &) {});
+ // Emit: if (PreCond) - end.
+ if (ContBlock) {
+ CGF.EmitBranch(ContBlock);
+ CGF.EmitBlock(ContBlock, true);
+ }
+ };
+ auto &&TaskGen = [&S, SharedsTy, CapturedStruct, IfCond, &Final,
+ Nogroup](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
+ const OMPPrivateDataTy &Data) {
+ auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
+ OMPLoopScope PreInitScope(CGF, S);
+ CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(
+ CGF, S.getLocStart(), S, Data.Tied, Final, Nogroup,
+ Data.NumberOfParts, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
+ Data.PrivateVars, Data.PrivateCopies, Data.FirstprivateVars,
+ Data.FirstprivateCopies, Data.FirstprivateInits);
+ };
+ CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
+ CodeGen);
+ };
+ EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, /*Tied=*/true);
+}
+
void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
- // emit the code inside the construct for now
- OMPLexicalScope Scope(*this, S);
- CGM.getOpenMPRuntime().emitInlinedDirective(
- *this, OMPD_taskloop, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
- OMPLoopScope PreInitScope(CGF, S);
- CGF.EmitStmt(
- cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
- });
+ EmitOMPTaskLoopBasedDirective(S);
}
void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
class BlockByrefInfo;
class BlockFlags;
class BlockFieldFlags;
+class RegionCodeGenTy;
class TargetCodeGenInfo;
/// The kind of evaluation to perform on values of a particular
/// \param D Directive (possibly) with the 'linear' clause.
void EmitOMPLinearClauseInit(const OMPLoopDirective &D);
+ struct OMPPrivateDataTy {
+ bool Tied;
+ unsigned NumberOfParts;
+ SmallVector<const Expr *, 4> PrivateVars;
+ SmallVector<const Expr *, 4> PrivateCopies;
+ SmallVector<const Expr *, 4> FirstprivateVars;
+ SmallVector<const Expr *, 4> FirstprivateCopies;
+ SmallVector<const Expr *, 4> FirstprivateInits;
+ SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 4> Dependences;
+ };
+ typedef const llvm::function_ref<void(CodeGenFunction & /*CGF*/,
+ llvm::Value * /*OutlinedFn*/,
+ const OMPPrivateDataTy & /*Data*/)>
+ TaskGenTy;
+ void EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
+ const RegionCodeGenTy &BodyGen,
+ const TaskGenTy &TaskGen, bool Tied);
+
void EmitOMPParallelDirective(const OMPParallelDirective &S);
void EmitOMPSimdDirective(const OMPSimdDirective &S);
void EmitOMPForDirective(const OMPForDirective &S);
void
EmitOMPCancellationPointDirective(const OMPCancellationPointDirective &S);
void EmitOMPCancelDirective(const OMPCancelDirective &S);
+ void EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S);
void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S);
void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S);
void EmitOMPDistributeDirective(const OMPDistributeDirective &S);
break;
}
case OMPD_taskloop: {
+ QualType KmpInt32Ty =
+ Context.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
+ QualType KmpUInt64Ty =
+ Context.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
+ QualType KmpInt64Ty =
+ Context.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
+ QualType Args[] = {Context.VoidPtrTy.withConst().withRestrict()};
+ FunctionProtoType::ExtProtoInfo EPI;
+ EPI.Variadic = true;
+ QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI);
Sema::CapturedParamNameType Params[] = {
+ std::make_pair(".global_tid.", KmpInt32Ty),
+ std::make_pair(".part_id.", Context.getPointerType(KmpInt32Ty)),
+ std::make_pair(".privates.",
+ Context.VoidPtrTy.withConst().withRestrict()),
+ std::make_pair(
+ ".copy_fn.",
+ Context.getPointerType(CopyFnType).withConst().withRestrict()),
+ std::make_pair(".task_t.", Context.VoidPtrTy.withConst()),
+ std::make_pair(".lb.", KmpUInt64Ty),
+ std::make_pair(".ub.", KmpUInt64Ty), std::make_pair(".st.", KmpInt64Ty),
+ std::make_pair(".liter.", KmpInt32Ty),
std::make_pair(StringRef(), QualType()) // __context with shared vars
};
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
Params);
+ // Mark this captured region as inlined, because we don't use outlined
+ // function directly.
+ getCurCapturedRegion()->TheCapturedDecl->addAttr(
+ AlwaysInlineAttr::CreateImplicit(
+ Context, AlwaysInlineAttr::Keyword_forceinline, SourceRange()));
break;
}
case OMPD_taskloop_simd: {
LastIteration32.get()->getType()->hasSignedIntegerRepresentation(),
LastIteration64.get(), SemaRef)))
LastIteration = LastIteration32;
+ QualType VType = LastIteration.get()->getType();
+ QualType RealVType = VType;
+ QualType StrideVType = VType;
+ if (isOpenMPTaskLoopDirective(DKind)) {
+ VType =
+ SemaRef.Context.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
+ StrideVType =
+ SemaRef.Context.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
+ }
if (!LastIteration.isUsable())
return 0;
SourceLocation InitLoc = IterSpaces[0].InitSrcRange.getBegin();
- QualType VType = LastIteration.get()->getType();
// Build variables passed into runtime, nesessary for worksharing directives.
ExprResult LB, UB, IL, ST, EUB;
if (isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) ||
/*DirectInit*/ false, /*TypeMayContainAuto*/ false);
// Stride variable returned by runtime (we initialize it to 1 by default).
- VarDecl *STDecl = buildVarDecl(SemaRef, InitLoc, VType, ".omp.stride");
- ST = buildDeclRefExpr(SemaRef, STDecl, VType, InitLoc);
+ VarDecl *STDecl =
+ buildVarDecl(SemaRef, InitLoc, StrideVType, ".omp.stride");
+ ST = buildDeclRefExpr(SemaRef, STDecl, StrideVType, InitLoc);
SemaRef.AddInitializerToDecl(
STDecl, SemaRef.ActOnIntegerConstant(InitLoc, 1).get(),
/*DirectInit*/ false, /*TypeMayContainAuto*/ false);
ExprResult IV;
ExprResult Init;
{
- VarDecl *IVDecl = buildVarDecl(SemaRef, InitLoc, VType, ".omp.iv");
- IV = buildDeclRefExpr(SemaRef, IVDecl, VType, InitLoc);
+ VarDecl *IVDecl = buildVarDecl(SemaRef, InitLoc, RealVType, ".omp.iv");
+ IV = buildDeclRefExpr(SemaRef, IVDecl, RealVType, InitLoc);
Expr *RHS = (isOpenMPWorksharingDirective(DKind) ||
isOpenMPTaskLoopDirective(DKind) ||
isOpenMPDistributeDirective(DKind))
--- /dev/null
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck %s
+// expected-no-diagnostics
+// REQUIRES: x86-registered-target
+#ifndef HEADER
+#define HEADER
+
+// CHECK-LABEL: @main
+int main(int argc, char **argv) {
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEFLOC:@.+]])
+// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK1:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
+// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
+// CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}}
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4
+// CHECK: store i64 0, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
+// CHECK: store i64 9, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
+// CHECK: store i64 1, i64* [[ST]],
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null)
+#pragma omp taskloop
+ for (int i = 0; i < 10; ++i)
+ ;
+// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK2:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
+// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
+// CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}}
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4
+// CHECK: store i64 0, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
+// CHECK: store i64 9, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
+// CHECK: store i64 1, i64* [[ST]],
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 0, i64 0, i8* null)
+#pragma omp taskloop nogroup
+ for (int i = 0; i < 10; ++i)
+ ;
+// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
+// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
+// CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}}
+// CHECK: [[IF:%.+]] = icmp ne i32 %{{.+}}, 0
+// CHECK: [[IF_INT:%.+]] = sext i1 [[IF]] to i32
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4
+// CHECK: store i64 0, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
+// CHECK: store i64 %{{.+}}, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
+// CHECK: store i64 1, i64* [[ST]],
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 [[IF_INT]], i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null)
+ int i;
+#pragma omp taskloop if(argc) shared(argc, argv) collapse(2)
+ for (i = 0; i < argc; ++i)
+ for (int j = argc; j < argv[argc][argc]; ++j)
+ ;
+}
+
+// CHECK: define internal i32 [[TASK1]](
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4
+// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
+// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
+// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
+// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
+// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
+// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
+// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
+// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
+// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
+// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
+// CHECK: br label
+// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],
+// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
+// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],
+// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
+// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
+// CHECK: load i32, i32* %
+// CHECK: store i32 %
+// CHECK: load i32, i32* %
+// CHECK: add nsw i32 %{{.+}}, 1
+// CHECK: store i32 %{{.+}}, i32* %
+// CHECK: br label %
+// CHECK: ret i32 0
+
+// CHECK: define internal i32 [[TASK2]](
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4
+// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
+// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
+// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
+// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
+// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
+// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
+// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
+// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
+// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
+// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
+// CHECK: br label
+// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],
+// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
+// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],
+// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
+// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
+// CHECK: load i32, i32* %
+// CHECK: store i32 %
+// CHECK: load i32, i32* %
+// CHECK: add nsw i32 %{{.+}}, 1
+// CHECK: store i32 %{{.+}}, i32* %
+// CHECK: br label %
+// CHECK: ret i32 0
+
+// CHECK: define internal i32 [[TASK3]](
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4
+// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
+// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
+// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
+// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
+// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
+// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
+// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
+// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
+// CHECK: store i64 [[LB_VAL]], i64* [[CNT:%.+]],
+// CHECK: br label
+// CHECK: ret i32 0
+
+// CHECK-LABEL: @_ZN1SC2Ei
+struct S {
+ int a;
+ S(int c) {
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEFLOC:@.+]])
+// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK4:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
+// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
+// CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}}
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4
+// CHECK: store i64 0, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
+// CHECK: store i64 %{{.+}}, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
+// CHECK: store i64 1, i64* [[ST]],
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null)
+#pragma omp taskloop shared(c)
+ for (a = 0; a < c; ++a)
+ ;
+ }
+} s(1);
+
+// CHECK: define internal i32 [[TASK4]](
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4
+// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
+// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
+// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
+// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
+// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
+// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
+// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
+// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
+// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
+// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
+// CHECK: br label
+// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],
+// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
+// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],
+// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
+// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
+// CHECK: load i32, i32* %
+// CHECK: store i32 %
+// CHECK: load i32, i32* %
+// CHECK: add nsw i32 %{{.+}}, 1
+// CHECK: store i32 %{{.+}}, i32* %
+// CHECK: br label %
+// CHECK: ret i32 0
+
+#endif