[CUDA] Detect and link with CUDA's libdevice bitcode library.

author Artem Belevich <tra@google.com>

Tue, 17 Nov 2015 22:28:50 +0000 (22:28 +0000)

committer Artem Belevich <tra@google.com>

Tue, 17 Nov 2015 22:28:50 +0000 (22:28 +0000)
author Artem Belevich <tra@google.com>
Tue, 17 Nov 2015 22:28:50 +0000 (22:28 +0000)
committer Artem Belevich <tra@google.com>
Tue, 17 Nov 2015 22:28:50 +0000 (22:28 +0000)
diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td

index 0e1d3035e42426bcb8e925a0fc866d5e0d4fad01..7141d1a6c14a3f29a15439a514284792a6ecc139 100644 (file)
--- a/include/clang/Driver/Options.td
+++ b/include/clang/Driver/Options.td
@@ -1597,6 +1597,7 @@ def no__dead__strip__inits__and__terms : Flag<["-"], "no_dead_strip_inits_and_te
  def nobuiltininc : Flag<["-"], "nobuiltininc">, Flags<[CC1Option]>,
    HelpText<"Disable builtin #include directories">;
  def nocudainc : Flag<["-"], "nocudainc">;
+def nocudalib : Flag<["-"], "nocudalib">;
  def nodefaultlibs : Flag<["-"], "nodefaultlibs">;
  def nofixprebinding : Flag<["-"], "nofixprebinding">;
  def nolibc : Flag<["-"], "nolibc">;
diff --git a/lib/Driver/ToolChains.cpp b/lib/Driver/ToolChains.cpp

index 34504732a122d590af603aba75e7c88aaba627ee..531e7900b1190a535a71dda535a0508128ea8431 100644 (file)
--- a/lib/Driver/ToolChains.cpp
+++ b/lib/Driver/ToolChains.cpp
@@ -1647,6 +1647,31 @@ void Generic_GCC::CudaInstallationDetector::init(
            D.getVFS().exists(CudaLibDevicePath)))
        continue;
  
+    std::error_code EC;
+    for (llvm::sys::fs::directory_iterator LI(CudaLibDevicePath, EC), LE;
+         !EC && LI != LE; LI = LI.increment(EC)) {
+      StringRef FilePath = LI->path();
+      StringRef FileName = llvm::sys::path::filename(FilePath);
+      // Process all bitcode filenames that look like libdevice.compute_XX.YY.bc
+      const StringRef LibDeviceName = "libdevice.";
+      if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
+        continue;
+      StringRef GpuArch = FileName.slice(
+          LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
+      CudaLibDeviceMap[GpuArch] = FilePath.str();
+      // Insert map entries for specifc devices with this compute capability.
+      if (GpuArch == "compute_20") {
+        CudaLibDeviceMap["sm_20"] = FilePath;
+        CudaLibDeviceMap["sm_21"] = FilePath;
+      } else if (GpuArch == "compute_30") {
+        CudaLibDeviceMap["sm_30"] = FilePath;
+        CudaLibDeviceMap["sm_32"] = FilePath;
+      } else if (GpuArch == "compute_35") {
+        CudaLibDeviceMap["sm_35"] = FilePath;
+        CudaLibDeviceMap["sm_37"] = FilePath;
+      }
+    }
+
      IsValid = true;
      break;
    }
@@ -4195,6 +4220,22 @@ CudaToolChain::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
                                       llvm::opt::ArgStringList &CC1Args) const {
    Linux::addClangTargetOptions(DriverArgs, CC1Args);
    CC1Args.push_back("-fcuda-is-device");
+
+  if (DriverArgs.hasArg(options::OPT_nocudalib))
+    return;
+
+  std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(
+      DriverArgs.getLastArgValue(options::OPT_march_EQ));
+  if (!LibDeviceFile.empty()) {
+    CC1Args.push_back("-mlink-cuda-bitcode");
+    CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
+
+    // Libdevice in CUDA-7.0 requires PTX version that's more recent
+    // than LLVM defaults to. Use PTX4.2 which is the PTX version that
+    // came with CUDA-7.0.
+    CC1Args.push_back("-target-feature");
+    CC1Args.push_back("+ptx42");
+  }
  }
  
  llvm::opt::DerivedArgList *
diff --git a/lib/Driver/ToolChains.h b/lib/Driver/ToolChains.h

index b1b670feb85296cae270e5cc79959478a38f34e4..b18739ce3931442860192bff8615695b78778c88 100644 (file)
--- a/lib/Driver/ToolChains.h
+++ b/lib/Driver/ToolChains.h
@@ -166,6 +166,7 @@ protected:
      std::string CudaLibPath;
      std::string CudaLibDevicePath;
      std::string CudaIncludePath;
+    llvm::StringMap<std::string> CudaLibDeviceMap;
  
    public:
      CudaInstallationDetector(const Driver &D) : IsValid(false), D(D) {}
@@ -185,6 +186,9 @@ protected:
      /// \brief Get the detected Cuda device library path.
      StringRef getLibDevicePath() const { return CudaLibDevicePath; }
      /// \brief Get libdevice file for given architecture
+    StringRef getLibDeviceFile(StringRef Gpu) const {
+      return CudaLibDeviceMap.lookup(Gpu);
+    }
    };
  
    CudaInstallationDetector CudaInstallation;
diff --git a/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/.keep b/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc

similarity index 100%

rename from test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/.keep

rename to test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc
diff --git a/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc b/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/test/Driver/cuda-detect.cu b/test/Driver/cuda-detect.cu

index 182e379df5bc1d015528bf141e5d54bf780bcf2f..085ef61a0ab6947925c066193170130ea0e69b49 100644 (file)
--- a/test/Driver/cuda-detect.cu
+++ b/test/Driver/cuda-detect.cu
@@ -8,26 +8,51 @@
  // RUN: %clang -v --target=i386-unknown-linux \
  // RUN:   --sysroot=%S/Inputs/CUDA 2>&1 | FileCheck %s
  // RUN: %clang -v --target=i386-unknown-linux \
+// RUN:   --sysroot=%S/Inputs/CUDA 2>&1 | FileCheck %s
+// RUN: %clang -v --target=i386-unknown-linux \
  // RUN:   --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 | FileCheck %s
  
-// Verify that CUDA include path gets added
+// Make sure we map libdevice bitcode files to proper GPUs.
+// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_21 \
+// RUN:   --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
+// RUN:   | FileCheck %s -check-prefix COMMON \
+// RUN:     -check-prefix LIBDEVICE -check-prefix LIBDEVICE21
  // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
  // RUN:   --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
-// RUN:   | FileCheck %s -check-prefix COMMON -check-prefix CUDAINC
-// Verify that -nocudainc disables CUDA include paths.
+// RUN:   | FileCheck %s -check-prefix COMMON -check-prefix CUDAINC \
+// RUN:     -check-prefix LIBDEVICE -check-prefix LIBDEVICE35
+// Verify that -nocudainc prevents adding include path to CUDA headers.
  // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
  // RUN:   -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
-// RUN:   | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC
+// RUN:   | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC \
+// RUN:     -check-prefix LIBDEVICE -check-prefix LIBDEVICE35
  // We should not add any CUDA include paths if there's no valid CUDA installation
  // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
  // RUN:   --cuda-path=%S/no-cuda-there %s 2>&1 \
  // RUN:   | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC
  
+// Verify that no options related to bitcode linking are passes if
+// there's no bitcode file.
+// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_30 \
+// RUN:   --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
+// RUN:   | FileCheck %s -check-prefix COMMON -check-prefix NOLIBDEVICE
+// .. or if we explicitly passed -nocudalib
+// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
+// RUN:   -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
+// RUN:   | FileCheck %s -check-prefix COMMON -check-prefix NOLIBDEVICE
+
  // CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA/usr/local/cuda
  // NOCUDA-NOT: Found CUDA installation:
  
  // COMMON: "-triple" "nvptx-nvidia-cuda"
  // COMMON-SAME: "-fcuda-is-device"
+// LIBDEVICE-SAME: "-mlink-cuda-bitcode"
+// NOLIBDEVICE-NOT: "-mlink-cuda-bitcode"
+// LIBDEVICE21-SAME: libdevice.compute_20.10.bc
+// LIBDEVICE35-SAME: libdevice.compute_35.10.bc
+// NOLIBDEVICE-NOT: libdevice.compute_{{.*}}.bc
+// LIBDEVICE-SAME: "-target-feature" "+ptx42"
+// NOLIBDEVICE-NOT: "-target-feature" "+ptx42"
  // CUDAINC-SAME: "-internal-isystem" "{{.*}}/Inputs/CUDA/usr/local/cuda/include"
  // NOCUDAINC-NOT: "-internal-isystem" "{{.*}}/cuda/include"
  // COMMON-SAME: "-x" "cuda"
author	Artem Belevich <tra@google.com>
	Tue, 17 Nov 2015 22:28:50 +0000 (22:28 +0000)
committer	Artem Belevich <tra@google.com>
	Tue, 17 Nov 2015 22:28:50 +0000 (22:28 +0000)
include/clang/Driver/Options.td		patch \| blob \| history
lib/Driver/ToolChains.cpp		patch \| blob \| history
lib/Driver/ToolChains.h		patch \| blob \| history
test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc	[moved from test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/.keep with 100% similarity]	patch \| blob \| history
test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc	[new file with mode: 0644]	patch \| blob
test/Driver/cuda-detect.cu		patch \| blob \| history