From 9a439088f2879ea18b67cbf3c583ab7c8928a521 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 12 Jul 2017 18:09:32 +0000 Subject: [PATCH] [OPENMP] Emit implicit taskgroup block around taskloop directives. If taskloop directive has no associated nogroup clause, it must emitted inside implicit taskgroup block. Runtime supports it, but we need to generate implicit taskgroup block explicitly to support future reductions codegen. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@307822 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGOpenMPRuntime.cpp | 19 ++++++++++++------- lib/CodeGen/CGStmtOpenMP.cpp | 13 ++++++++++++- test/OpenMP/taskloop_codegen.cpp | 6 +++++- test/OpenMP/taskloop_simd_codegen.cpp | 6 +++++- 4 files changed, 34 insertions(+), 10 deletions(-) diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 3df95a4e9b..a2ea0dec3e 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -4157,9 +4157,15 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, /*IsInitializer=*/true); enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; llvm::Value *TaskArgs[] = { - UpLoc, ThreadID, Result.NewTask, IfVal, LBLVal.getPointer(), - UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()), - llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0), + UpLoc, + ThreadID, + Result.NewTask, + IfVal, + LBLVal.getPointer(), + UBLVal.getPointer(), + CGF.EmitLoadOfScalar(StLVal, SourceLocation()), + llvm::ConstantInt::getNullValue( + CGF.IntTy), // Always 0 because taskgroup emitted by the compiler llvm::ConstantInt::getSigned( CGF.IntTy, Data.Schedule.getPointer() ? Data.Schedule.getInt() ? NumTasks : Grainsize @@ -4168,10 +4174,9 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, /*isSigned=*/false) : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), - Result.TaskDupFn - ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Result.TaskDupFn, - CGF.VoidPtrTy) - : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; + Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Result.TaskDupFn, CGF.VoidPtrTy) + : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); } diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index 493cd627e4..71797e2e6f 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -4363,7 +4363,18 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, CodeGen); }; - EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); + if (Data.Nogroup) + EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); + else { + CGM.getOpenMPRuntime().emitTaskgroupRegion( + *this, + [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); + }, + S.getLocStart()); + } } void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { diff --git a/test/OpenMP/taskloop_codegen.cpp b/test/OpenMP/taskloop_codegen.cpp index bc7367ce02..b1b4545580 100644 --- a/test/OpenMP/taskloop_codegen.cpp +++ b/test/OpenMP/taskloop_codegen.cpp @@ -8,6 +8,7 @@ // CHECK-LABEL: @main int main(int argc, char **argv) { // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEFLOC:@.+]]) +// CHECK: call void @__kmpc_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]]) // CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 33, i64 72, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK1:@.+]] to i32 (i32, i8*)*)) // CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* // CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 @@ -19,6 +20,7 @@ int main(int argc, char **argv) { // CHECK: store i64 1, i64* [[ST]], // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], // CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null) +// CHECK: call void @__kmpc_end_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]]) #pragma omp taskloop priority(argc) for (int i = 0; i < 10; ++i) ; @@ -33,10 +35,11 @@ int main(int argc, char **argv) { // CHECK: store i64 1, i64* [[ST]], // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], // CHECK: [[GRAINSIZE:%.+]] = zext i32 %{{.+}} to i64 -// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 1, i64 [[GRAINSIZE]], i8* null) +// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 1, i64 [[GRAINSIZE]], i8* null) #pragma omp taskloop nogroup grainsize(argc) for (int i = 0; i < 10; ++i) ; +// CHECK: call void @__kmpc_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]]) // CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 72, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*)) // CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* // CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 @@ -50,6 +53,7 @@ int main(int argc, char **argv) { // CHECK: store i64 1, i64* [[ST]], // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], // CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 [[IF_INT]], i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 2, i64 4, i8* null) +// CHECK: call void @__kmpc_end_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]]) int i; #pragma omp taskloop if(argc) shared(argc, argv) collapse(2) num_tasks(4) for (i = 0; i < argc; ++i) diff --git a/test/OpenMP/taskloop_simd_codegen.cpp b/test/OpenMP/taskloop_simd_codegen.cpp index 60ba5f2212..48c6f479e9 100644 --- a/test/OpenMP/taskloop_simd_codegen.cpp +++ b/test/OpenMP/taskloop_simd_codegen.cpp @@ -8,6 +8,7 @@ // CHECK-LABEL: @main int main(int argc, char **argv) { // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEFLOC:@.+]]) +// CHECK: call void @__kmpc_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]]) // CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 33, i64 72, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK1:@.+]] to i32 (i32, i8*)*)) // CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* // CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 @@ -19,6 +20,7 @@ int main(int argc, char **argv) { // CHECK: store i64 1, i64* [[ST]], // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], // CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null) +// CHECK: call void @__kmpc_end_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]]) #pragma omp taskloop simd priority(argc) for (int i = 0; i < 10; ++i) ; @@ -33,10 +35,11 @@ int main(int argc, char **argv) { // CHECK: store i64 1, i64* [[ST]], // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], // CHECK: [[GRAINSIZE:%.+]] = zext i32 %{{.+}} to i64 -// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 1, i64 [[GRAINSIZE]], i8* null) +// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 1, i64 [[GRAINSIZE]], i8* null) #pragma omp taskloop simd nogroup grainsize(argc) simdlen(4) for (int i = 0; i < 10; ++i) ; +// CHECK: call void @__kmpc_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]]) // CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 72, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*)) // CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* // CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 @@ -50,6 +53,7 @@ int main(int argc, char **argv) { // CHECK: store i64 1, i64* [[ST]], // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], // CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 [[IF_INT]], i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 2, i64 4, i8* null) +// CHECK: call void @__kmpc_end_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]]) int i; #pragma omp taskloop simd if(argc) shared(argc, argv) collapse(2) num_tasks(4) safelen(32) for (i = 0; i < argc; ++i) -- 2.40.0