[CMake][OpenMP] Customize default offloading arch

author Jonas Hahnfeld <hahnjo@hahnjo.de>

Tue, 17 Oct 2017 13:37:36 +0000 (13:37 +0000)

committer Jonas Hahnfeld <hahnjo@hahnjo.de>

Tue, 17 Oct 2017 13:37:36 +0000 (13:37 +0000)
author Jonas Hahnfeld <hahnjo@hahnjo.de>
Tue, 17 Oct 2017 13:37:36 +0000 (13:37 +0000)
committer Jonas Hahnfeld <hahnjo@hahnjo.de>
Tue, 17 Oct 2017 13:37:36 +0000 (13:37 +0000)
diff --git a/CMakeLists.txt b/CMakeLists.txt

index b55c64d9e05238723a5ae3103b8b35c7daeadee4..42d580077d878a4c855ac91d9e845238e10d279b 100644 (file)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -235,6 +235,17 @@ endif()
  set(CLANG_DEFAULT_OPENMP_RUNTIME "libomp" CACHE STRING
    "Default OpenMP runtime used by -fopenmp.")
  
+# OpenMP offloading requires at least sm_30 because we use shuffle instructions
+# to generate efficient code for reductions.
+set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING
+  "Default architecture for OpenMP offloading to Nvidia GPUs.")
+string(REGEX MATCH "^sm_([0-9]+)$" MATCHED_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}")
+if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 30)
+  message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_30")
+  set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING
+    "Default architecture for OpenMP offloading to Nvidia GPUs." FORCE)
+endif()
+
  set(CLANG_VENDOR ${PACKAGE_VENDOR} CACHE STRING
    "Vendor-specific text for showing with version information.")
  
diff --git a/include/clang/Config/config.h.cmake b/include/clang/Config/config.h.cmake

index b138b5fcd8288dd6a13e9db5da983f0156e757a2..3ee7258b3b34bbeb1399a0ad31c757bb4df6f787 100644 (file)
--- a/include/clang/Config/config.h.cmake
+++ b/include/clang/Config/config.h.cmake
@@ -20,6 +20,9 @@
  /* Default OpenMP runtime used by -fopenmp. */
  #define CLANG_DEFAULT_OPENMP_RUNTIME "${CLANG_DEFAULT_OPENMP_RUNTIME}"
  
+/* Default architecture for OpenMP offloading to Nvidia GPUs. */
+#define CLANG_OPENMP_NVPTX_DEFAULT_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}"
+
  /* Multilib suffix for libdir. */
  #define CLANG_LIBDIR_SUFFIX "${CLANG_LIBDIR_SUFFIX}"
  
diff --git a/lib/Driver/ToolChains/Cuda.cpp b/lib/Driver/ToolChains/Cuda.cpp

index 4d040a204dd304f3265de702794749179ebbbd01..4f740fc9525f6e6501dcdb151f86af8f9c8880ea 100644 (file)
--- a/lib/Driver/ToolChains/Cuda.cpp
+++ b/lib/Driver/ToolChains/Cuda.cpp
@@ -542,9 +542,9 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
    // flags are not duplicated.
    // Also append the compute capability.
    if (DeviceOffloadKind == Action::OFK_OpenMP) {
-    for (Arg *A : Args){
+    for (Arg *A : Args) {
        bool IsDuplicate = false;
-      for (Arg *DALArg : *DAL){
+      for (Arg *DALArg : *DAL) {
          if (A == DALArg) {
            IsDuplicate = true;
            break;
@@ -555,14 +555,9 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
      }
  
      StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
-    if (Arch.empty()) {
-      // Default compute capability for CUDA toolchain is the
-      // lowest compute capability supported by the installed
-      // CUDA version.
-      DAL->AddJoinedArg(nullptr,
-          Opts.getOption(options::OPT_march_EQ),
-          CudaInstallation.getLowestExistingArch());
-    }
+    if (Arch.empty())
+      DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
+                        CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
  
      return DAL;
    }
diff --git a/lib/Driver/ToolChains/Cuda.h b/lib/Driver/ToolChains/Cuda.h

index 5144f5b40bb7421e750f71927f2da4381dcde558..1e30aa72702a7b2b062d192e7c3b79123d3a1811 100644 (file)
--- a/lib/Driver/ToolChains/Cuda.h
+++ b/lib/Driver/ToolChains/Cuda.h
@@ -76,17 +76,6 @@ public:
    std::string getLibDeviceFile(StringRef Gpu) const {
      return LibDeviceMap.lookup(Gpu);
    }
-  /// \brief Get lowest available compute capability
-  /// for which a libdevice library exists.
-  std::string getLowestExistingArch() const {
-    std::string LibDeviceFile;
-    for (auto key : LibDeviceMap.keys()) {
-      LibDeviceFile = LibDeviceMap.lookup(key);
-      if (!LibDeviceFile.empty())
-        return key;
-    }
-    return "sm_20";
-  }
  };
  
  namespace tools {
author	Jonas Hahnfeld <hahnjo@hahnjo.de>
	Tue, 17 Oct 2017 13:37:36 +0000 (13:37 +0000)
committer	Jonas Hahnfeld <hahnjo@hahnjo.de>
	Tue, 17 Oct 2017 13:37:36 +0000 (13:37 +0000)
CMakeLists.txt		patch \| blob \| history
include/clang/Config/config.h.cmake		patch \| blob \| history
lib/Driver/ToolChains/Cuda.cpp		patch \| blob \| history
lib/Driver/ToolChains/Cuda.h		patch \| blob \| history