[OPENMP]Codegen support for allocate directive on global variables.

author Alexey Bataev <a.bataev@hotmail.com>

Thu, 21 Mar 2019 19:35:27 +0000 (19:35 +0000)

committer Alexey Bataev <a.bataev@hotmail.com>

Thu, 21 Mar 2019 19:35:27 +0000 (19:35 +0000)
author Alexey Bataev <a.bataev@hotmail.com>
Thu, 21 Mar 2019 19:35:27 +0000 (19:35 +0000)
committer Alexey Bataev <a.bataev@hotmail.com>
Thu, 21 Mar 2019 19:35:27 +0000 (19:35 +0000)
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp

index 6dbc244bb2913895519b53f2f3b4ba394523f7fa..d3736b7244d8f75d1737730e3b57b0c553d0f69b 100644 (file)
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -8928,6 +8928,30 @@ void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
           " Expected target-based directive.");
  }
  
+bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
+                                                       LangAS &AS) {
+  if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
+    return false;
+  const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
+  switch(A->getAllocatorType()) {
+  case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
+  // Not supported, fallback to the default mem space.
+  case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
+  case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
+  case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
+  case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
+  case OMPAllocateDeclAttr::OMPThreadMemAlloc:
+  case OMPAllocateDeclAttr::OMPConstMemAlloc:
+  case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
+    AS = LangAS::Default;
+    return true;
+  case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
+    llvm_unreachable("Expected predefined allocator for the variables with the "
+                     "static storage.");
+  }
+  return false;
+}
+
  CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
      CodeGenModule &CGM)
      : CGM(CGM) {
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h

index 2896a659b9874bd2db93cde09cf7908a9ae5585e..7b2c0f1b9149bdd79ad78048fe77e86e5c69d629 100644 (file)
--- a/lib/CodeGen/CGOpenMPRuntime.h
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -1598,6 +1598,11 @@ public:
    /// Perform check on requires decl to ensure that target architecture
    /// supports unified addressing
    virtual void checkArchForUnifiedAddressing(const OMPRequiresDecl *D) const {}
+
+  /// Checks if the variable has associated OMPAllocateDeclAttr attribute with
+  /// the predefined allocator and translates it into the corresponding address
+  /// space.
+  virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS);
  };
  
  /// Class supports emissionof SIMD-only code.
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp

index 59066e8813d4743c3e1c81fe0ccfa21a1b597e54..7de160322692d7cbe74171fabdbfb3019118f26a 100644 (file)
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -4840,6 +4840,34 @@ unsigned CGOpenMPRuntimeNVPTX::getDefaultFirstprivateAddressSpace() const {
    return CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant);
  }
  
+bool CGOpenMPRuntimeNVPTX::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
+                                                            LangAS &AS) {
+  if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
+    return false;
+  const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
+  switch(A->getAllocatorType()) {
+  case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
+  // Not supported, fallback to the default mem space.
+  case OMPAllocateDeclAttr::OMPThreadMemAlloc:
+  case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
+  case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
+  case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
+  case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
+    AS = LangAS::Default;
+    return true;
+  case OMPAllocateDeclAttr::OMPConstMemAlloc:
+    AS = LangAS::cuda_constant;
+    return true;
+  case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
+    AS = LangAS::cuda_shared;
+    return true;
+  case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
+    llvm_unreachable("Expected predefined allocator for the variables with the "
+                     "static storage.");
+  }
+  return false;
+}
+
  // Get current CudaArch and ignore any unknown values
  static CudaArch getCudaArch(CodeGenModule &CGM) {
    if (!CGM.getTarget().hasFeature("ptx"))
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h

index 8a92c500b8f6367ee9020594a7062714738711ac..6709ae322a6a2db7b7b11479347d9f92c389ea8d 100644 (file)
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -389,6 +389,11 @@ public:
    /// address space by default.
    unsigned getDefaultFirstprivateAddressSpace() const override;
  
+  /// Checks if the variable has associated OMPAllocateDeclAttr attribute with
+  /// the predefined allocator and translates it into the corresponding address
+  /// space.
+  bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override;
+
  private:
    /// Track the execution mode when codegening directives within a target
    /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp

index 3a9df23a495465f51cf6777177e6bdb33e71d5d8..b9d4ee9f8c44bd9376b4790579fbf10397177485 100644 (file)
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -3387,6 +3387,11 @@ LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) {
        return LangAS::cuda_device;
    }
  
+  if (LangOpts.OpenMP) {
+    LangAS AS;
+    if (OpenMPRuntime->hasAllocateAttributeForGlobalVar(D, AS))
+      return AS;
+  }
    return getTargetCodeGenInfo().getGlobalVarAddressSpace(*this, D);
  }
  
diff --git a/test/OpenMP/nvptx_allocate_codegen.cpp b/test/OpenMP/nvptx_allocate_codegen.cpp

new file mode 100644 (file)

index 0000000..e9b9509
--- /dev/null
+++ b/test/OpenMP/nvptx_allocate_codegen.cpp
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-apple-darwin10.6.0 -fopenmp-targets=nvptx64-nvidia-cuda  -emit-llvm-bc -o %t-host.bc %s
+// RUN: %clang_cc1 -verify -fopenmp -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+#pragma omp declare target
+typedef void **omp_allocator_handle_t;
+extern const omp_allocator_handle_t omp_default_mem_alloc;
+extern const omp_allocator_handle_t omp_large_cap_mem_alloc;
+extern const omp_allocator_handle_t omp_const_mem_alloc;
+extern const omp_allocator_handle_t omp_high_bw_mem_alloc;
+extern const omp_allocator_handle_t omp_low_lat_mem_alloc;
+extern const omp_allocator_handle_t omp_cgroup_mem_alloc;
+extern const omp_allocator_handle_t omp_pteam_mem_alloc;
+extern const omp_allocator_handle_t omp_thread_mem_alloc;
+
+// CHECK-DAG: @{{.+}}St1{{.+}}b{{.+}} = external global i32,
+// CHECK-DAG: @a = global i32 0,
+// CHECK-DAG: @b = addrspace(4) global i32 0,
+// CHECK-DAG: @c = global i32 0,
+// CHECK-DAG: @d = global %struct.St1 zeroinitializer,
+// CHECK-DAG: @{{.+}}ns{{.+}}a{{.+}} = addrspace(3) global i32 0,
+// CHECK-DAG: @{{.+}}main{{.+}}a{{.*}} = internal global i32 0,
+// CHECK-DAG: @{{.+}}ST{{.+}}m{{.+}} = external global i32,
+struct St{
+ int a;
+};
+
+struct St1{
+ int a;
+ static int b;
+#pragma omp allocate(b) allocator(omp_default_mem_alloc)
+} d;
+
+int a, b, c;
+#pragma omp allocate(a) allocator(omp_large_cap_mem_alloc)
+#pragma omp allocate(b) allocator(omp_const_mem_alloc)
+#pragma omp allocate(d, c) allocator(omp_high_bw_mem_alloc)
+
+template <class T>
+struct ST {
+  static T m;
+  #pragma omp allocate(m) allocator(omp_low_lat_mem_alloc)
+};
+
+template <class T> T foo() {
+  T v;
+  #pragma omp allocate(v) allocator(omp_cgroup_mem_alloc)
+  v = ST<T>::m;
+  return v;
+}
+
+namespace ns{
+  int a;
+}
+#pragma omp allocate(ns::a) allocator(omp_pteam_mem_alloc)
+
+int main () {
+  static int a;
+#pragma omp allocate(a) allocator(omp_thread_mem_alloc)
+  a=2;
+  double b = 3;
+#pragma omp allocate(b)
+  return (foo<int>());
+}
+
+extern template int ST<int>::m;
+#pragma omp end declare target
+#endif
author	Alexey Bataev <a.bataev@hotmail.com>
	Thu, 21 Mar 2019 19:35:27 +0000 (19:35 +0000)
committer	Alexey Bataev <a.bataev@hotmail.com>
	Thu, 21 Mar 2019 19:35:27 +0000 (19:35 +0000)
lib/CodeGen/CGOpenMPRuntime.cpp		patch \| blob \| history
lib/CodeGen/CGOpenMPRuntime.h		patch \| blob \| history
lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp		patch \| blob \| history
lib/CodeGen/CGOpenMPRuntimeNVPTX.h		patch \| blob \| history
lib/CodeGen/CodeGenModule.cpp		patch \| blob \| history
test/OpenMP/nvptx_allocate_codegen.cpp	[new file with mode: 0644]	patch \| blob