[OpenMP] Check target architecture supports unified shared memory for requires direct...

author Patrick Lyster <Patrick.lyster@ibm.com>

Mon, 19 Nov 2018 15:09:33 +0000 (15:09 +0000)

committer Patrick Lyster <Patrick.lyster@ibm.com>

Mon, 19 Nov 2018 15:09:33 +0000 (15:09 +0000)
author Patrick Lyster <Patrick.lyster@ibm.com>
Mon, 19 Nov 2018 15:09:33 +0000 (15:09 +0000)
committer Patrick Lyster <Patrick.lyster@ibm.com>
Mon, 19 Nov 2018 15:09:33 +0000 (15:09 +0000)
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp

index e4895fe835cff226891f21e9d3be3d6b829820df..b36b328ad47efdd023a3d17748759cefed0aec25 100644 (file)
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -2185,5 +2185,5 @@ void CodeGenModule::EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D,
  }
  
  void CodeGenModule::EmitOMPRequiresDecl(const OMPRequiresDecl *D) {
-  //Do nothing - here to avoid build errors
+  getOpenMPRuntime().checkArchForUnifiedAddressing(*this, D);
  }
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h

index 8b8f57c5be45e171dc142a31a803a09e6f2e305e..1a27db157462a8de0aa5a9555d523771c71e1767 100644 (file)
--- a/lib/CodeGen/CGOpenMPRuntime.h
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -1553,6 +1553,11 @@ public:
    virtual void
    adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF,
                                       const OMPExecutableDirective &D) const;
+
+  /// Perform check on requires decl to ensure that target architecture
+  /// supports unified addressing
+  virtual void checkArchForUnifiedAddressing(CodeGenModule &CGM,
+                                             const OMPRequiresDecl *D) const {}
  };
  
  /// Class supports emissionof SIMD-only code.
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp

index c9a7c4c3f68183f2c48c0c57626568a2dad8478e..74b24eacce431f1f52ac02a6b525b9500e8f2c81 100644 (file)
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -4547,6 +4547,74 @@ void CGOpenMPRuntimeNVPTX::adjustTargetSpecificDataForLambdas(
    }
  }
  
+// Get current CudaArch and ignore any unknown values
+static CudaArch getCudaArch(CodeGenModule &CGM) {
+  if (!CGM.getTarget().hasFeature("ptx"))
+    return CudaArch::UNKNOWN;
+  llvm::StringMap<bool> Features;
+  CGM.getTarget().initFeatureMap(Features, CGM.getDiags(),
+                                 CGM.getTarget().getTargetOpts().CPU,
+                                 CGM.getTarget().getTargetOpts().Features);
+  for (const auto &Feature : Features) {
+    if (Feature.getValue()) {
+      CudaArch Arch = StringToCudaArch(Feature.getKey());
+      if (Arch != CudaArch::UNKNOWN)
+        return Arch;
+    }
+  }
+  return CudaArch::UNKNOWN;
+}
+
+/// Check to see if target architecture supports unified addressing which is
+/// a restriction for OpenMP requires clause "unified_shared_memory".
+void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing(
+    CodeGenModule &CGM, const OMPRequiresDecl *D) const {
+  for (const OMPClause *Clause : D->clauselists()) {
+    if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
+      switch (getCudaArch(CGM)) {
+      case CudaArch::SM_20:
+      case CudaArch::SM_21:
+      case CudaArch::SM_30:
+      case CudaArch::SM_32:
+      case CudaArch::SM_35:
+      case CudaArch::SM_37:
+      case CudaArch::SM_50:
+      case CudaArch::SM_52:
+      case CudaArch::SM_53:
+      case CudaArch::SM_60:
+      case CudaArch::SM_61:
+      case CudaArch::SM_62:
+        CGM.Error(Clause->getBeginLoc(),
+                  "Target architecture does not support unified addressing");
+        return;
+      case CudaArch::SM_70:
+      case CudaArch::SM_72:
+      case CudaArch::SM_75:
+      case CudaArch::GFX600:
+      case CudaArch::GFX601:
+      case CudaArch::GFX700:
+      case CudaArch::GFX701:
+      case CudaArch::GFX702:
+      case CudaArch::GFX703:
+      case CudaArch::GFX704:
+      case CudaArch::GFX801:
+      case CudaArch::GFX802:
+      case CudaArch::GFX803:
+      case CudaArch::GFX810:
+      case CudaArch::GFX900:
+      case CudaArch::GFX902:
+      case CudaArch::GFX904:
+      case CudaArch::GFX906:
+      case CudaArch::GFX909:
+      case CudaArch::UNKNOWN:
+        break;
+      case CudaArch::LAST:
+        llvm_unreachable("Unexpected Cuda arch.");
+      }
+    }
+  }
+}
+
  /// Get number of SMs and number of blocks per SM.
  static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) {
    std::pair<unsigned, unsigned> Data;
@@ -4556,55 +4624,45 @@ static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) {
      Data.second = CGM.getLangOpts().OpenMPCUDABlocksPerSM;
    if (Data.first && Data.second)
      return Data;
-  if (CGM.getTarget().hasFeature("ptx")) {
-    llvm::StringMap<bool> Features;
-    CGM.getTarget().initFeatureMap(Features, CGM.getDiags(),
-                                   CGM.getTarget().getTargetOpts().CPU,
-                                   CGM.getTarget().getTargetOpts().Features);
-    for (const auto &Feature : Features) {
-      if (Feature.getValue()) {
-        switch (StringToCudaArch(Feature.getKey())) {
-        case CudaArch::SM_20:
-        case CudaArch::SM_21:
-        case CudaArch::SM_30:
-        case CudaArch::SM_32:
-        case CudaArch::SM_35:
-        case CudaArch::SM_37:
-        case CudaArch::SM_50:
-        case CudaArch::SM_52:
-        case CudaArch::SM_53:
-          return {16, 16};
-        case CudaArch::SM_60:
-        case CudaArch::SM_61:
-        case CudaArch::SM_62:
-          return {56, 32};
-        case CudaArch::SM_70:
-        case CudaArch::SM_72:
-        case CudaArch::SM_75:
-          return {84, 32};
-        case CudaArch::GFX600:
-        case CudaArch::GFX601:
-        case CudaArch::GFX700:
-        case CudaArch::GFX701:
-        case CudaArch::GFX702:
-        case CudaArch::GFX703:
-        case CudaArch::GFX704:
-        case CudaArch::GFX801:
-        case CudaArch::GFX802:
-        case CudaArch::GFX803:
-        case CudaArch::GFX810:
-        case CudaArch::GFX900:
-        case CudaArch::GFX902:
-        case CudaArch::GFX904:
-        case CudaArch::GFX906:
-        case CudaArch::GFX909:
-        case CudaArch::UNKNOWN:
-          break;
-        case CudaArch::LAST:
-          llvm_unreachable("Unexpected Cuda arch.");
-        }
-      }
-    }
+  switch (getCudaArch(CGM)) {
+  case CudaArch::SM_20:
+  case CudaArch::SM_21:
+  case CudaArch::SM_30:
+  case CudaArch::SM_32:
+  case CudaArch::SM_35:
+  case CudaArch::SM_37:
+  case CudaArch::SM_50:
+  case CudaArch::SM_52:
+  case CudaArch::SM_53:
+    return {16, 16};
+  case CudaArch::SM_60:
+  case CudaArch::SM_61:
+  case CudaArch::SM_62:
+    return {56, 32};
+  case CudaArch::SM_70:
+  case CudaArch::SM_72:
+  case CudaArch::SM_75:
+    return {84, 32};
+  case CudaArch::GFX600:
+  case CudaArch::GFX601:
+  case CudaArch::GFX700:
+  case CudaArch::GFX701:
+  case CudaArch::GFX702:
+  case CudaArch::GFX703:
+  case CudaArch::GFX704:
+  case CudaArch::GFX801:
+  case CudaArch::GFX802:
+  case CudaArch::GFX803:
+  case CudaArch::GFX810:
+  case CudaArch::GFX900:
+  case CudaArch::GFX902:
+  case CudaArch::GFX904:
+  case CudaArch::GFX906:
+  case CudaArch::GFX909:
+  case CudaArch::UNKNOWN:
+    break;
+  case CudaArch::LAST:
+    llvm_unreachable("Unexpected Cuda arch.");
    }
    llvm_unreachable("Unexpected NVPTX target without ptx feature.");
  }
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h

index aff9cf21135870d82061cb599269900a692b83e9..2acab0735809abe4fb1e6b717175436cf2b5a01f 100644 (file)
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -356,6 +356,11 @@ public:
    void adjustTargetSpecificDataForLambdas(
        CodeGenFunction &CGF, const OMPExecutableDirective &D) const override;
  
+  /// Perform check on requires decl to ensure that target architecture
+  /// supports unified addressing
+  void checkArchForUnifiedAddressing(CodeGenModule &CGM,
+                                     const OMPRequiresDecl *D) const override;
+
  private:
    /// Track the execution mode when codegening directives within a target
    /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the
diff --git a/test/OpenMP/requires_codegen.cpp b/test/OpenMP/requires_codegen.cpp

new file mode 100644 (file)

index 0000000..e94fd28
--- /dev/null
+++ b/test/OpenMP/requires_codegen.cpp
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -DREGION_HOST
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_20 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_21 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_30 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_32 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_35 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_37 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_50 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_52 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_53 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_60 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_61 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_62 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_70 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_NO_ERR
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_72 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_NO_ERR
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_75 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_NO_ERR
+
+#if defined(REGION_HOST) || defined(REGION_DEVICE_NO_ERR)
+// expected-no-diagnostics
+#pragma omp requires unified_shared_memory
+#endif
+
+#ifdef REGION_DEVICE
+#pragma omp requires unified_shared_memory // expected-error {{Target architecture does not support unified addressing}} 
+#endif
author	Patrick Lyster <Patrick.lyster@ibm.com>
	Mon, 19 Nov 2018 15:09:33 +0000 (15:09 +0000)
committer	Patrick Lyster <Patrick.lyster@ibm.com>
	Mon, 19 Nov 2018 15:09:33 +0000 (15:09 +0000)
lib/CodeGen/CGDecl.cpp		patch \| blob \| history
lib/CodeGen/CGOpenMPRuntime.h		patch \| blob \| history
lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp		patch \| blob \| history
lib/CodeGen/CGOpenMPRuntimeNVPTX.h		patch \| blob \| history
test/OpenMP/requires_codegen.cpp	[new file with mode: 0644]	patch \| blob