From 8a5660d9acf30d389733e6d10fd0211b0afa9e1b Mon Sep 17 00:00:00 2001 From: Gheorghe-Teodor Bercea Date: Fri, 14 Jun 2019 17:58:26 +0000 Subject: [PATCH] [OpenMP] Avoid emitting maps for target link variables when unified memory is used Summary: This patch avoids the emission of maps for target link variables when unified memory is present. Reviewers: ABataev, caomhin Reviewed By: ABataev Subscribers: guansong, jdoerfert, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D60883 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@363435 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGOpenMPRuntime.cpp | 7 ++++- lib/CodeGen/CGOpenMPRuntime.h | 3 ++ lib/Sema/SemaOpenMP.cpp | 3 +- ..._target_requires_unified_shared_memory.cpp | 29 +++++++------------ 4 files changed, 22 insertions(+), 20 deletions(-) diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 651cca4993..09be478a26 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -8266,7 +8266,8 @@ public: continue; llvm::Optional Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); - if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) + if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || + !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) continue; StructRangeInfoTy PartialStruct; generateInfoForComponentList( @@ -9251,6 +9252,10 @@ bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, return false; } +bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { + return HasRequiresUnifiedSharedMemory; +} + CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( CodeGenModule &CGM) : CGM(CGM) { diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index 1a26620dc7..5be322de23 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -1623,6 +1623,9 @@ public: /// the predefined allocator and translates it into the corresponding address /// space. virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS); + + /// Return whether the unified_shared_memory has been specified. + bool hasRequiresUnifiedSharedMemory() const; }; /// Class supports emissionof SIMD-only code. diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp index 7e75a98070..a4eafb180a 100644 --- a/lib/Sema/SemaOpenMP.cpp +++ b/lib/Sema/SemaOpenMP.cpp @@ -2667,7 +2667,8 @@ public: llvm::Optional Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (VD->hasGlobalStorage() && CS && !CS->capturesVariable(VD) && - (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)) + (Stack->hasRequiresDeclWithClause() || + !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)) return; SourceLocation ELoc = E->getExprLoc(); diff --git a/test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp b/test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp index 6ce2ffb45e..8349649f6a 100644 --- a/test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp +++ b/test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp @@ -26,42 +26,35 @@ int bar(int n){ // CHECK: [[VAR:@.+]] = global double 1.000000e+01 // CHECK: [[VAR_DECL_TGT_LINK_PTR:@.+]] = global double* [[VAR]] -// CHECK: [[OFFLOAD_SIZES:@.+]] = private unnamed_addr constant [3 x i64] [i64 4, i64 8, i64 8] -// CHECK: [[OFFLOAD_MAPTYPES:@.+]] = private unnamed_addr constant [3 x i64] [i64 800, i64 800, i64 531] +// CHECK: [[OFFLOAD_SIZES:@.+]] = private unnamed_addr constant [2 x i64] [i64 4, i64 8] +// CHECK: [[OFFLOAD_MAPTYPES:@.+]] = private unnamed_addr constant [2 x i64] [i64 800, i64 800] // CHECK: [[N_CASTED:%.+]] = alloca i64 // CHECK: [[SUM_CASTED:%.+]] = alloca i64 -// CHECK: [[OFFLOAD_BASEPTRS:%.+]] = alloca [3 x i8*] -// CHECK: [[OFFLOAD_PTRS:%.+]] = alloca [3 x i8*] +// CHECK: [[OFFLOAD_BASEPTRS:%.+]] = alloca [2 x i8*] +// CHECK: [[OFFLOAD_PTRS:%.+]] = alloca [2 x i8*] // CHECK: [[LOAD1:%.+]] = load i64, i64* [[N_CASTED]] // CHECK: [[LOAD2:%.+]] = load i64, i64* [[SUM_CASTED]] -// CHECK: [[BPTR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK: [[BPTR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK: [[BCAST1:%.+]] = bitcast i8** [[BPTR1]] to i64* // CHECK: store i64 [[LOAD1]], i64* [[BCAST1]] -// CHECK: [[BPTR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 0 +// CHECK: [[BPTR2:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 0 // CHECK: [[BCAST2:%.+]] = bitcast i8** [[BPTR2]] to i64* // CHECK: store i64 [[LOAD1]], i64* [[BCAST2]] -// CHECK: [[BPTR3:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK: [[BPTR3:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 1 // CHECK: [[BCAST3:%.+]] = bitcast i8** [[BPTR3]] to i64* // CHECK: store i64 [[LOAD2]], i64* [[BCAST3]] -// CHECK: [[BPTR4:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 1 +// CHECK: [[BPTR4:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 1 // CHECK: [[BCAST4:%.+]] = bitcast i8** [[BPTR4]] to i64* // CHECK: store i64 [[LOAD2]], i64* [[BCAST4]] -// CHECK: [[BPTR5:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK: [[BCAST5:%.+]] = bitcast i8** [[BPTR5]] to double*** -// CHECK: store double** [[VAR_DECL_TGT_LINK_PTR]], double*** [[BCAST5]] -// CHECK: [[BPTR6:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 2 -// CHECK: [[BCAST6:%.+]] = bitcast i8** [[BPTR6]] to double** -// CHECK: store double* [[VAR]], double** [[BCAST6]] +// CHECK: [[BPTR7:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK: [[BPTR8:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 0 -// CHECK: [[BPTR7:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK: [[BPTR8:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 0 - -// CHECK: call i32 @__tgt_target(i64 -1, i8* @{{.*}}.region_id, i32 3, i8** [[BPTR7]], i8** [[BPTR8]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[OFFLOAD_SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[OFFLOAD_MAPTYPES]], i32 0, i32 0)) +// CHECK: call i32 @__tgt_target(i64 -1, i8* @{{.*}}.region_id, i32 2, i8** [[BPTR7]], i8** [[BPTR8]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[OFFLOAD_SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[OFFLOAD_MAPTYPES]], i32 0, i32 0)) #endif -- 2.50.1