CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
break;
}
+ case OMPRTL__kmpc_omp_task_begin_if0: {
+ // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
+ // *new_task);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
+ CGM.VoidPtrTy};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn =
+ CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
+ break;
+ }
+ case OMPRTL__kmpc_omp_task_complete_if0: {
+ // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
+ // *new_task);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
+ CGM.VoidPtrTy};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy,
+ /*Name=*/"__kmpc_omp_task_complete_if0");
+ break;
+ }
case OMPRTL__kmpc_ordered: {
// Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
return nullptr;
}
-void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
- llvm::Value *OutlinedFn,
- llvm::Value *CapturedStruct) {
- // Build call __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/)
- llvm::Value *Args[] = {
- emitUpdateLocation(CGF, Loc),
- CGF.Builder.getInt32(1), // Number of arguments after 'microtask' argument
- // (there is only one additional argument - 'context')
- CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()),
- CGF.EmitCastToVoidPtr(CapturedStruct)};
- auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
- CGF.EmitRuntimeCall(RTLFn, Args);
-}
+/// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
+/// function. Here is the logic:
+/// if (Cond) {
+/// ThenGen();
+/// } else {
+/// ElseGen();
+/// }
+static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
+ const RegionCodeGenTy &ThenGen,
+ const RegionCodeGenTy &ElseGen) {
+ CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
+
+ // If the condition constant folds and can be elided, try to avoid emitting
+ // the condition and the dead arm of the if/else.
+ bool CondConstant;
+ if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
+ CodeGenFunction::RunCleanupsScope Scope(CGF);
+ if (CondConstant) {
+ ThenGen(CGF);
+ } else {
+ ElseGen(CGF);
+ }
+ return;
+ }
-void CGOpenMPRuntime::emitSerialCall(CodeGenFunction &CGF, SourceLocation Loc,
- llvm::Value *OutlinedFn,
- llvm::Value *CapturedStruct) {
- auto ThreadID = getThreadID(CGF, Loc);
- // Build calls:
- // __kmpc_serialized_parallel(&Loc, GTid);
- llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), ThreadID};
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
- Args);
+ // Otherwise, the condition did not fold, or we couldn't elide it. Just
+ // emit the conditional branch.
+ auto ThenBlock = CGF.createBasicBlock("omp_if.then");
+ auto ElseBlock = CGF.createBasicBlock("omp_if.else");
+ auto ContBlock = CGF.createBasicBlock("omp_if.end");
+ CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
- // OutlinedFn(>id, &zero, CapturedStruct);
- auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
- auto Int32Ty =
- CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
- auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr");
- CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
- llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct};
- CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
+ // Emit the 'then' code.
+ CGF.EmitBlock(ThenBlock);
+ {
+ CodeGenFunction::RunCleanupsScope ThenScope(CGF);
+ ThenGen(CGF);
+ }
+ CGF.EmitBranch(ContBlock);
+ // Emit the 'else' code if present.
+ {
+ // There is no need to emit line number for unconditional branch.
+ auto NL = ApplyDebugLocation::CreateEmpty(CGF);
+ CGF.EmitBlock(ElseBlock);
+ }
+ {
+ CodeGenFunction::RunCleanupsScope ThenScope(CGF);
+ ElseGen(CGF);
+ }
+ {
+ // There is no need to emit line number for unconditional branch.
+ auto NL = ApplyDebugLocation::CreateEmpty(CGF);
+ CGF.EmitBranch(ContBlock);
+ }
+ // Emit the continuation block for code after the if.
+ CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
+}
- // __kmpc_end_serialized_parallel(&Loc, GTid);
- llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
- CGF.EmitRuntimeCall(
- createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
+void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
+ llvm::Value *OutlinedFn,
+ llvm::Value *CapturedStruct,
+ const Expr *IfCond) {
+ auto *RTLoc = emitUpdateLocation(CGF, Loc);
+ auto &&ThenGen =
+ [this, OutlinedFn, CapturedStruct, RTLoc](CodeGenFunction &CGF) {
+ // Build call __kmpc_fork_call(loc, 1, microtask,
+ // captured_struct/*context*/)
+ llvm::Value *Args[] = {
+ RTLoc,
+ CGF.Builder.getInt32(
+ 1), // Number of arguments after 'microtask' argument
+ // (there is only one additional argument - 'context')
+ CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()),
+ CGF.EmitCastToVoidPtr(CapturedStruct)};
+ auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
+ CGF.EmitRuntimeCall(RTLFn, Args);
+ };
+ auto &&ElseGen = [this, OutlinedFn, CapturedStruct, RTLoc, Loc](
+ CodeGenFunction &CGF) {
+ auto ThreadID = getThreadID(CGF, Loc);
+ // Build calls:
+ // __kmpc_serialized_parallel(&Loc, GTid);
+ llvm::Value *Args[] = {RTLoc, ThreadID};
+ CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
+ Args);
+
+ // OutlinedFn(>id, &zero, CapturedStruct);
+ auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
+ auto Int32Ty = CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32,
+ /*Signed*/ true);
+ auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr");
+ CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+ llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct};
+ CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
+
+ // __kmpc_end_serialized_parallel(&Loc, GTid);
+ llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
+ CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
+ };
+ if (IfCond) {
+ emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
+ } else {
+ CodeGenFunction::RunCleanupsScope Scope(CGF);
+ ThenGen(CGF);
+ }
}
// If we're inside an (outlined) parallel region, use the region info's
void CGOpenMPRuntime::emitTaskCall(
CodeGenFunction &CGF, SourceLocation Loc, bool Tied,
llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
- llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds) {
+ llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds,
+ const Expr *IfCond) {
auto &C = CGM.getContext();
auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
// Build type kmp_routine_entry_t (if not built yet).
CGF.Builder.CreateStructGEP(KmpTaskTTy, NewTaskNewTaskTTy,
/*Idx=*/KmpTaskTDestructors),
CGM.PointerAlignInBytes);
-
// NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
// libcall.
// Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
// *new_task);
- llvm::Value *TaskArgs[] = {emitUpdateLocation(CGF, Loc),
- getThreadID(CGF, Loc), NewTask};
- // TODO: add check for untied tasks.
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
+ auto *ThreadID = getThreadID(CGF, Loc);
+ llvm::Value *TaskArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID, NewTask};
+ auto &&ThenCodeGen = [this, &TaskArgs](CodeGenFunction &CGF) {
+ // TODO: add check for untied tasks.
+ CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
+ };
+ auto &&ElseCodeGen =
+ [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry](
+ CodeGenFunction &CGF) {
+ CodeGenFunction::RunCleanupsScope LocalScope(CGF);
+ CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs);
+ // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
+ // kmp_task_t *new_task);
+ CGF.EHStack.pushCleanup<CallEndCleanup>(
+ NormalAndEHCleanup,
+ createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0),
+ llvm::makeArrayRef(TaskArgs));
+
+ // Call proxy_task_entry(gtid, new_task);
+ llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
+ CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
+ };
+ if (IfCond) {
+ emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
+ } else {
+ CodeGenFunction::RunCleanupsScope Scope(CGF);
+ ThenCodeGen(CGF);
+ }
}
static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
// Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
// kmp_critical_name *lck);
OMPRTL__kmpc_end_reduce_nowait,
+ // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
+ // kmp_task_t * new_task);
+ OMPRTL__kmpc_omp_task_begin_if0,
+ // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
+ // kmp_task_t * new_task);
+ OMPRTL__kmpc_omp_task_complete_if0,
// Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
OMPRTL__kmpc_ordered,
// Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
///
void functionFinished(CodeGenFunction &CGF);
- /// \brief Emits code for parallel call of the \a OutlinedFn with variables
- /// captured in a record which address is stored in \a CapturedStruct.
+ /// \brief Emits code for parallel or serial call of the \a OutlinedFn with
+ /// variables captured in a record which address is stored in \a
+ /// CapturedStruct.
/// \param OutlinedFn Outlined function to be run in parallel threads. Type of
/// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
/// \param CapturedStruct A pointer to the record with the references to
/// variables used in \a OutlinedFn function.
+ /// \param IfCond Condition in the associated 'if' clause, if it was
+ /// specified, nullptr otherwise.
///
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
llvm::Value *OutlinedFn,
- llvm::Value *CapturedStruct);
-
- /// \brief Emits code for serial call of the \a OutlinedFn with variables
- /// captured in a record which address is stored in \a CapturedStruct.
- /// \param OutlinedFn Outlined function to be run in serial mode.
- /// \param CapturedStruct A pointer to the record with the references to
- /// variables used in \a OutlinedFn function.
- ///
- virtual void emitSerialCall(CodeGenFunction &CGF, SourceLocation Loc,
- llvm::Value *OutlinedFn,
- llvm::Value *CapturedStruct);
+ llvm::Value *CapturedStruct,
+ const Expr *IfCond);
/// \brief Emits a critical region.
/// \param CriticalName Name of the critical region.
/// \param SharedsTy A type which contains references the shared variables.
/// \param Shareds Context with the list of shared variables from the \a
/// TaskFunction.
+ /// \param IfCond Not a nullptr if 'if' clause was specified, nullptr
+ /// otherwise.
virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, bool Tied,
llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
llvm::Value *TaskFunction, QualType SharedsTy,
- llvm::Value *Shareds);
+ llvm::Value *Shareds, const Expr *IfCond);
/// \brief Emit code for the directive that does not require outlining.
///
//===----------------------------------------------------------------------===//
// OpenMP Directive Emission
//===----------------------------------------------------------------------===//
-/// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
-/// function. Here is the logic:
-/// if (Cond) {
-/// CodeGen(true);
-/// } else {
-/// CodeGen(false);
-/// }
-static void EmitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
- const std::function<void(bool)> &CodeGen) {
- CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
-
- // If the condition constant folds and can be elided, try to avoid emitting
- // the condition and the dead arm of the if/else.
- bool CondConstant;
- if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
- CodeGen(CondConstant);
- return;
- }
-
- // Otherwise, the condition did not fold, or we couldn't elide it. Just
- // emit the conditional branch.
- auto ThenBlock = CGF.createBasicBlock(/*name*/ "omp_if.then");
- auto ElseBlock = CGF.createBasicBlock(/*name*/ "omp_if.else");
- auto ContBlock = CGF.createBasicBlock(/*name*/ "omp_if.end");
- CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount*/ 0);
-
- // Emit the 'then' code.
- CGF.EmitBlock(ThenBlock);
- CodeGen(/*ThenBlock*/ true);
- CGF.EmitBranch(ContBlock);
- // Emit the 'else' code if present.
- {
- // There is no need to emit line number for unconditional branch.
- auto NL = ApplyDebugLocation::CreateEmpty(CGF);
- CGF.EmitBlock(ElseBlock);
- }
- CodeGen(/*ThenBlock*/ false);
- {
- // There is no need to emit line number for unconditional branch.
- auto NL = ApplyDebugLocation::CreateEmpty(CGF);
- CGF.EmitBranch(ContBlock);
- }
- // Emit the continuation block for code after the if.
- CGF.EmitBlock(ContBlock, /*IsFinished*/ true);
-}
-
void CodeGenFunction::EmitOMPAggregateAssign(
llvm::Value *DestAddr, llvm::Value *SrcAddr, QualType OriginalType,
const llvm::function_ref<void(llvm::Value *, llvm::Value *)> &CopyGen) {
}
}
-/// \brief Emits code for OpenMP parallel directive in the parallel region.
-static void emitOMPParallelCall(CodeGenFunction &CGF,
- const OMPExecutableDirective &S,
- llvm::Value *OutlinedFn,
- llvm::Value *CapturedStruct) {
- if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) {
+static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
+ const OMPExecutableDirective &S,
+ const RegionCodeGenTy &CodeGen) {
+ auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
+ auto CapturedStruct = CGF.GenerateCapturedStmtArgument(*CS);
+ auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
+ S, *CS->getCapturedDecl()->param_begin(), CodeGen);
+ if (auto C = S.getSingleClause(OMPC_num_threads)) {
CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
auto NumThreadsClause = cast<OMPNumThreadsClause>(C);
auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
CGF, NumThreads, NumThreadsClause->getLocStart());
}
+ const Expr *IfCond = nullptr;
+ if (auto C = S.getSingleClause(OMPC_if)) {
+ IfCond = cast<OMPIfClause>(C)->getCondition();
+ }
CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
- CapturedStruct);
-}
-
-static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
- const OMPExecutableDirective &S,
- const RegionCodeGenTy &CodeGen) {
- auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
- auto CapturedStruct = CGF.GenerateCapturedStmtArgument(*CS);
- auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
- S, *CS->getCapturedDecl()->param_begin(), CodeGen);
- if (auto C = S.getSingleClause(/*K*/ OMPC_if)) {
- auto Cond = cast<OMPIfClause>(C)->getCondition();
- EmitOMPIfClause(CGF, Cond, [&](bool ThenBlock) {
- if (ThenBlock)
- emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct);
- else
- CGF.CGM.getOpenMPRuntime().emitSerialCall(CGF, S.getLocStart(),
- OutlinedFn, CapturedStruct);
- });
- } else
- emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct);
+ CapturedStruct, IfCond);
}
void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
Final.setInt(/*IntVal=*/false);
}
auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
+ const Expr *IfCond = nullptr;
+ if (auto C = S.getSingleClause(OMPC_if)) {
+ IfCond = cast<OMPIfClause>(C)->getCondition();
+ }
CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), Tied, Final,
- OutlinedFn, SharedsTy, CapturedStruct);
+ OutlinedFn, SharedsTy, CapturedStruct,
+ IfCond);
}
void CodeGenFunction::EmitOMPTaskyieldDirective(
--- /dev/null
+// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK %s
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+void fn1();
+void fn2();
+void fn3();
+void fn4();
+void fn5();
+void fn6();
+
+int Arg;
+
+// CHECK-LABEL: define void @{{.+}}gtid_test
+void gtid_test() {
+// CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 1, {{.+}}* [[GTID_TEST_REGION1:@.+]] to void
+#pragma omp parallel
+#pragma omp task if (false)
+ gtid_test();
+// CHECK: ret void
+}
+
+// CHECK: define internal void [[GTID_TEST_REGION1]](i32* [[GTID_PARAM:%.+]], i
+// CHECK: store i32* [[GTID_PARAM]], i32** [[GTID_ADDR_REF:%.+]],
+// CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_REF]]
+// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]]
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(
+// CHECK: [[TASK_PTR:%.+]] = bitcast i8* [[ORIG_TASK_PTR]] to
+// CHECK: call void @__kmpc_omp_task_begin_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: call i32 [[GTID_TEST_REGION2:@.+]](i32 [[GTID]], %{{.+}}* [[TASK_PTR]])
+// CHECK: call void @__kmpc_omp_task_complete_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: ret void
+
+// CHECK: define internal i32 [[GTID_TEST_REGION2]](
+// CHECK: call void @{{.+}}gtid_test
+// CHECK: ret i32
+
+template <typename T>
+int tmain(T Arg) {
+#pragma omp task if (true)
+ fn1();
+#pragma omp task if (false)
+ fn2();
+#pragma omp task if (Arg)
+ fn3();
+ return 0;
+}
+
+// CHECK-LABEL: @main
+int main() {
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc({{[^,]+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[CAP_FN4:[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: call i32 @__kmpc_omp_task(%{{.+}}* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
+#pragma omp task if (true)
+ fn4();
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(
+// CHECK: [[TASK_PTR:%.+]] = bitcast i8* [[ORIG_TASK_PTR]] to
+// CHECK: call void @__kmpc_omp_task_begin_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: call i32 [[CAP_FN5:@.+]](i32 [[GTID]], %{{.+}}* [[TASK_PTR]])
+// CHECK: call void @__kmpc_omp_task_complete_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+#pragma omp task if (false)
+ fn5();
+
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc({{[^,]+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[CAP_FN6:[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_PTR:%.+]] = bitcast i8* [[ORIG_TASK_PTR]] to
+// CHECK: br i1 %{{.+}}, label %[[OMP_THEN:.+]], label %[[OMP_ELSE:.+]]
+// CHECK: [[OMP_THEN]]
+// CHECK: call i32 @__kmpc_omp_task(%{{.+}}* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: br label %[[OMP_END:.+]]
+// CHECK: [[OMP_ELSE]]
+// CHECK: call void @__kmpc_omp_task_begin_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: call i32 [[CAP_FN6:@.+]](i32 [[GTID]], %{{.+}}* [[TASK_PTR]])
+// CHECK: call void @__kmpc_omp_task_complete_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: br label %[[OMP_END]]
+// CHECK: [[OMP_END]]
+#pragma omp task if (Arg)
+ fn6();
+ // CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
+ return tmain(Arg);
+}
+
+// CHECK: define internal i32 [[CAP_FN4]]
+// CHECK: call void @{{.+}}fn4
+// CHECK: ret i32
+
+// CHECK: define internal i32 [[CAP_FN5]]
+// CHECK: call void @{{.+}}fn5
+// CHECK: ret i32
+
+// CHECK: define internal i32 [[CAP_FN6]]
+// CHECK: call void @{{.+}}fn6
+// CHECK: ret i32
+
+// CHECK-LABEL: define {{.+}} @{{.+}}tmain
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^,]+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[CAP_FN1:[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: call i32 @__kmpc_omp_task(%{{.+}}* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
+
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(
+// CHECK: [[TASK_PTR:%.+]] = bitcast i8* [[ORIG_TASK_PTR]] to
+// CHECK: call void @__kmpc_omp_task_begin_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: call i32 [[CAP_FN2:@.+]](i32 [[GTID]], %{{.+}}* [[TASK_PTR]])
+// CHECK: call void @__kmpc_omp_task_complete_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^,]+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[CAP_FN3:[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_PTR:%.+]] = bitcast i8* [[ORIG_TASK_PTR]] to
+// CHECK: br i1 %{{.+}}, label %[[OMP_THEN:.+]], label %[[OMP_ELSE:.+]]
+// CHECK: [[OMP_THEN]]
+// CHECK: call i32 @__kmpc_omp_task(%{{.+}}* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: br label %[[OMP_END:.+]]
+// CHECK: [[OMP_ELSE]]
+// CHECK: call void @__kmpc_omp_task_begin_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: call i32 [[CAP_FN3:@.+]](i32 [[GTID]], %{{.+}}* [[TASK_PTR]])
+// CHECK: call void @__kmpc_omp_task_complete_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: br label %[[OMP_END]]
+// CHECK: [[OMP_END]]
+
+// CHECK: define internal i32 [[CAP_FN1]]
+// CHECK: call void @{{.+}}fn1
+// CHECK: ret i32
+
+// CHECK: define internal i32 [[CAP_FN2]]
+// CHECK: call void @{{.+}}fn2
+// CHECK: ret i32
+
+// CHECK: define internal i32 [[CAP_FN3]]
+// CHECK: call void @{{.+}}fn3
+// CHECK: ret i32
+
+#endif