From e8ca256e5e3f0705a447f60d297e97457fd28438 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 13 Dec 2017 21:04:20 +0000 Subject: [PATCH] [OPENMP] Add codegen for `nowait` clause in target directives. Added basic codegen for `nowait` clauses in target-based directives. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@320613 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/DiagnosticSemaKinds.td | 4 +- lib/CodeGen/CGOpenMPRuntime.cpp | 53 +++++++++++++++++-- test/OpenMP/target_codegen.cpp | 4 +- test/OpenMP/target_parallel_codegen.cpp | 4 +- test/OpenMP/target_parallel_for_codegen.cpp | 4 +- .../target_parallel_for_simd_codegen.cpp | 4 +- test/OpenMP/target_simd_codegen.cpp | 4 +- test/OpenMP/target_teams_codegen.cpp | 4 +- .../target_teams_distribute_codegen.cpp | 4 +- .../target_teams_distribute_simd_codegen.cpp | 4 +- 10 files changed, 68 insertions(+), 21 deletions(-) diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td index 4792ae3359..22cd7c7545 100644 --- a/include/clang/Basic/DiagnosticSemaKinds.td +++ b/include/clang/Basic/DiagnosticSemaKinds.td @@ -8852,9 +8852,9 @@ def err_omp_function_expected : Error< def err_omp_wrong_cancel_region : Error< "one of 'for', 'parallel', 'sections' or 'taskgroup' is expected">; def err_omp_parent_cancel_region_nowait : Error< - "parent region for 'omp %select{cancellation point/cancel}0' construct cannot be nowait">; + "parent region for 'omp %select{cancellation point|cancel}0' construct cannot be nowait">; def err_omp_parent_cancel_region_ordered : Error< - "parent region for 'omp %select{cancellation point/cancel}0' construct cannot be ordered">; + "parent region for 'omp %select{cancellation point|cancel}0' construct cannot be ordered">; def err_omp_reduction_wrong_type : Error<"reduction type cannot be %select{qualified with 'const', 'volatile' or 'restrict'|a function|a reference|an array}0 type">; def err_omp_wrong_var_in_declare_reduction : Error<"only %select{'omp_priv' or 'omp_orig'|'omp_in' or 'omp_out'}0 variables are allowed in %select{initializer|combiner}0 expression">; def err_omp_declare_reduction_redefinition : Error<"redefinition of user-defined reduction for type %0">; diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 2995e27e39..2359a32d56 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -672,10 +672,18 @@ enum OpenMPRTLFunction { // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t // *arg_types); OMPRTL__tgt_target, + // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, + // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // *arg_types); + OMPRTL__tgt_target_nowait, // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t // *arg_types, int32_t num_teams, int32_t thread_limit); OMPRTL__tgt_target_teams, + // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void + // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t + // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); + OMPRTL__tgt_target_teams_nowait, // Call to void __tgt_register_lib(__tgt_bin_desc *desc); OMPRTL__tgt_register_lib, // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); @@ -2042,6 +2050,22 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); break; } + case OMPRTL__tgt_target_nowait: { + // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, + // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, + // int64_t *arg_types); + llvm::Type *TypeParams[] = {CGM.Int64Ty, + CGM.VoidPtrTy, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int64Ty->getPointerTo()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); + break; + } case OMPRTL__tgt_target_teams: { // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, @@ -2060,6 +2084,24 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); break; } + case OMPRTL__tgt_target_teams_nowait: { + // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void + // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t + // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); + llvm::Type *TypeParams[] = {CGM.Int64Ty, + CGM.VoidPtrTy, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int64Ty->getPointerTo(), + CGM.Int32Ty, + CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); + break; + } case OMPRTL__tgt_register_lib: { // Build void __tgt_register_lib(__tgt_bin_desc *desc); QualType ParamTy = @@ -7010,6 +7052,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D); auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D); + bool HasNowait = D.hasClausesOfKind(); // The target region is an outlined function launched by the runtime // via calls __tgt_target() or __tgt_target_teams(). // @@ -7052,15 +7095,19 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, Info.MapTypesArray, NumTeams, NumThreads}; Return = CGF.EmitRuntimeCall( - RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); + RT.createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait + : OMPRTL__tgt_target_teams), + OffloadingArgs); } else { llvm::Value *OffloadingArgs[] = { DeviceID, OutlinedFnID, PointerNum, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, Info.MapTypesArray}; - Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target), - OffloadingArgs); + Return = CGF.EmitRuntimeCall( + RT.createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait + : OMPRTL__tgt_target), + OffloadingArgs); } // Check the error code and execute the host version if required. diff --git a/test/OpenMP/target_codegen.cpp b/test/OpenMP/target_codegen.cpp index 91a0b83e6d..20bc96b84c 100644 --- a/test/OpenMP/target_codegen.cpp +++ b/test/OpenMP/target_codegen.cpp @@ -111,7 +111,7 @@ int foo(int n) { // CHECK-DAG: [[ADD:%.+]] = add nsw i32 // CHECK-DAG: [[DEVICE:%.+]] = sext i32 [[ADD]] to i64 - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0) + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0 @@ -134,7 +134,7 @@ int foo(int n) { // CHECK: call void [[HVT0_:@.+]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]]) // CHECK-NEXT: br label %[[END]] // CHECK: [[END]] - #pragma omp target device(global + a) + #pragma omp target device(global + a) nowait { static int local1; *plocal = global; diff --git a/test/OpenMP/target_parallel_codegen.cpp b/test/OpenMP/target_parallel_codegen.cpp index ce280b4549..b9d00da02f 100644 --- a/test/OpenMP/target_parallel_codegen.cpp +++ b/test/OpenMP/target_parallel_codegen.cpp @@ -93,14 +93,14 @@ int foo(int n) { double cn[5][n]; TT d; - // CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null, i32 1, i32 0) + // CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null, i32 1, i32 0) // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] // CHECK: call void [[HVT0:@.+]]() // CHECK-NEXT: br label %[[END]] // CHECK: [[END]] - #pragma omp target parallel + #pragma omp target parallel nowait { } diff --git a/test/OpenMP/target_parallel_for_codegen.cpp b/test/OpenMP/target_parallel_for_codegen.cpp index 55dd4d2a57..58ac10bf9b 100644 --- a/test/OpenMP/target_parallel_for_codegen.cpp +++ b/test/OpenMP/target_parallel_for_codegen.cpp @@ -116,7 +116,7 @@ int foo(int n) { a += 1; } - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT2]], i32 0, i32 0), i32 1, i32 0) + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT2]], i32 0, i32 0), i32 1, i32 0) // CHECK-DAG: [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 0 @@ -145,7 +145,7 @@ int foo(int n) { // CHECK-NEXT: br label %[[END]] // CHECK: [[END]] int lin = 12; - #pragma omp target parallel for if(target: 1) linear(lin, a : get_val()) + #pragma omp target parallel for if(target: 1) linear(lin, a : get_val()) nowait for (unsigned long long it = 2000; it >= 600; it-=400) { aa += 1; } diff --git a/test/OpenMP/target_parallel_for_simd_codegen.cpp b/test/OpenMP/target_parallel_for_simd_codegen.cpp index 1ed8d3a8bf..d168214df3 100644 --- a/test/OpenMP/target_parallel_for_simd_codegen.cpp +++ b/test/OpenMP/target_parallel_for_simd_codegen.cpp @@ -95,14 +95,14 @@ int foo(int n) { double cn[5][n]; TT d; - // CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null, i32 1, i32 0) + // CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null, i32 1, i32 0) // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] // CHECK: call void [[HVT0:@.+]]() // CHECK-NEXT: br label %[[END]] // CHECK: [[END]] - #pragma omp target parallel for simd + #pragma omp target parallel for simd nowait for (int i = 3; i < 32; i += 5) { } diff --git a/test/OpenMP/target_simd_codegen.cpp b/test/OpenMP/target_simd_codegen.cpp index edabfc5eaa..d57e381855 100644 --- a/test/OpenMP/target_simd_codegen.cpp +++ b/test/OpenMP/target_simd_codegen.cpp @@ -92,14 +92,14 @@ int foo(int n) { double cn[5][n]; TT d; - // CHECK: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null) + // CHECK: [[RET:%.+]] = call i32 @__tgt_target_nowait(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null) // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] // CHECK: call void [[HVT0:@.+]]() // CHECK-NEXT: br label %[[END]] // CHECK: [[END]] - #pragma omp target simd + #pragma omp target simd nowait for (int i = 3; i < 32; i += 5) { } diff --git a/test/OpenMP/target_teams_codegen.cpp b/test/OpenMP/target_teams_codegen.cpp index 284a147f28..e6c250178d 100644 --- a/test/OpenMP/target_teams_codegen.cpp +++ b/test/OpenMP/target_teams_codegen.cpp @@ -97,7 +97,7 @@ int foo(int n) { double cn[5][n]; TT d; - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i32 {{[^,]+}}, i32 {{[^)]+}}) + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i32 {{[^,]+}}, i32 {{[^)]+}}) // CHECK-DAG: [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]] @@ -124,7 +124,7 @@ int foo(int n) { // CHECK: call void [[HVT0:@.+]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}}) // CHECK-NEXT: br label %[[END]] // CHECK: [[END]] - #pragma omp target teams num_teams(a) thread_limit(a) firstprivate(aa) + #pragma omp target teams num_teams(a) thread_limit(a) firstprivate(aa) nowait { } diff --git a/test/OpenMP/target_teams_distribute_codegen.cpp b/test/OpenMP/target_teams_distribute_codegen.cpp index b3eae6c4bd..17baa603d1 100644 --- a/test/OpenMP/target_teams_distribute_codegen.cpp +++ b/test/OpenMP/target_teams_distribute_codegen.cpp @@ -97,7 +97,7 @@ int foo(int n) { double cn[5][n]; TT d; - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i32 {{[^,]+}}, i32 {{[^)]+}}) + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i32 {{[^,]+}}, i32 {{[^)]+}}) // CHECK-DAG: [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]] @@ -124,7 +124,7 @@ int foo(int n) { // CHECK: call void [[HVT0:@.+]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}}) // CHECK-NEXT: br label %[[END]] // CHECK: [[END]] - #pragma omp target teams distribute num_teams(a) thread_limit(a) firstprivate(aa) + #pragma omp target teams distribute num_teams(a) thread_limit(a) firstprivate(aa) nowait for (int i = 0; i < 10; ++i) { } diff --git a/test/OpenMP/target_teams_distribute_simd_codegen.cpp b/test/OpenMP/target_teams_distribute_simd_codegen.cpp index b7f031fce7..7fb76d9198 100644 --- a/test/OpenMP/target_teams_distribute_simd_codegen.cpp +++ b/test/OpenMP/target_teams_distribute_simd_codegen.cpp @@ -97,7 +97,7 @@ int foo(int n) { double cn[5][n]; TT d; - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i32 {{[^,]+}}, i32 {{[^)]+}}) + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i32 {{[^,]+}}, i32 {{[^)]+}}) // CHECK-DAG: [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]] @@ -124,7 +124,7 @@ int foo(int n) { // CHECK: call void [[HVT0:@.+]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}}) // CHECK-NEXT: br label %[[END]] // CHECK: [[END]] - #pragma omp target teams distribute simd num_teams(a) thread_limit(a) firstprivate(aa) simdlen(16) + #pragma omp target teams distribute simd num_teams(a) thread_limit(a) firstprivate(aa) simdlen(16) nowait for (int i = 0; i < 10; ++i) { } -- 2.40.0