[CUDA] pass debug options to ptxas.

author Artem Belevich <tra@google.com>

Tue, 16 Feb 2016 22:03:20 +0000 (22:03 +0000)

committer Artem Belevich <tra@google.com>

Tue, 16 Feb 2016 22:03:20 +0000 (22:03 +0000)
author Artem Belevich <tra@google.com>
Tue, 16 Feb 2016 22:03:20 +0000 (22:03 +0000)
committer Artem Belevich <tra@google.com>
Tue, 16 Feb 2016 22:03:20 +0000 (22:03 +0000)
diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td

index 2f691c385cf5f438a6f442a0ee2bd5efb93bff61..7e5dc098b0fcfeb748c67691531bf2fc226d491d 100644 (file)
--- a/include/clang/Driver/Options.td
+++ b/include/clang/Driver/Options.td
@@ -378,6 +378,8 @@ def cuda_gpu_arch_EQ : Joined<["--"], "cuda-gpu-arch=">,
    Flags<[DriverOption, HelpHidden]>, HelpText<"CUDA GPU architecture">;
  def cuda_host_only : Flag<["--"], "cuda-host-only">,
    HelpText<"Do host-side CUDA compilation only">;
+def cuda_noopt_device_debug : Flag<["--"], "cuda-noopt-device-debug">,
+  HelpText<"Enable device-side debug info generation. Disables ptxas optimizations.">;
  def cuda_path_EQ : Joined<["--"], "cuda-path=">, Group<i_Group>,
    HelpText<"CUDA installation path">;
  def dA : Flag<["-"], "dA">, Group<d_Group>;
diff --git a/lib/Driver/Tools.cpp b/lib/Driver/Tools.cpp

index 44d6b8464a5eba4ca9c42b6cb7cf19f20e6342dc..159936760a8bfe6449238c474f48b217bc4d59d3 100644 (file)
--- a/lib/Driver/Tools.cpp
+++ b/lib/Driver/Tools.cpp
@@ -10691,15 +10691,20 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
    assert(gpu_archs.size() == 1 && "Exactly one GPU Arch required for ptxas.");
    const std::string& gpu_arch = gpu_archs[0];
  
-
    ArgStringList CmdArgs;
    CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
+  if (Args.getLastArg(options::OPT_cuda_noopt_device_debug)) {
+    // ptxas does not accept -g option if optimization is enabled, so
+    // we ignore the compiler's -O* options if we want debug info.
+    CmdArgs.push_back("-g");
+    CmdArgs.push_back("--dont-merge-basicblocks");
+    CmdArgs.push_back("--return-at-end");
+  } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
+    // Map the -O we received to -O{0,1,2,3}.
+    //
+    // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
+    // default, so it may correspond more closely to the spirit of clang -O2.
  
-  // Map the -O we received to -O{0,1,2,3}.
-  //
-  // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's default,
-  // so it may correspond more closely to the spirit of clang -O2.
-  if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
      // -O3 seems like the least-bad option when -Osomething is specified to
      // clang but it isn't handled below.
      StringRef OOpt = "3";
@@ -10725,9 +10730,6 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
      CmdArgs.push_back("-O0");
    }
  
-  // Don't bother passing -g to ptxas: It's enabled by default at -O0, and
-  // not supported at other optimization levels.
-
    CmdArgs.push_back("--gpu-name");
    CmdArgs.push_back(Args.MakeArgString(gpu_arch));
    CmdArgs.push_back("--output-file");
diff --git a/test/Driver/cuda-external-tools.cu b/test/Driver/cuda-external-tools.cu

index bdc6a386a7125f0cae424d324c4b95ae4ad336ac..7a6396078915e9f43a914402f1b923dcbfd42f76 100644 (file)
--- a/test/Driver/cuda-external-tools.cu
+++ b/test/Driver/cuda-external-tools.cu
@@ -18,6 +18,10 @@
  // RUN: %clang -### -target x86_64-linux-gnu -Ofast -c %s 2>&1 \
  // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
  
+// With debugging enabled, ptxas should be run with with no ptxas optimizations.
+// RUN: %clang -### -target x86_64-linux-gnu --cuda-noopt-device-debug -O2 -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix DBG %s
+
  // Regular compile without -O.  This should result in us passing -O0 to ptxas.
  // RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \
  // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s
@@ -59,9 +63,14 @@
  // ARCH64: "-m64"
  // ARCH32: "-m32"
  // OPT0: "-O0"
+// OPT0-NOT: "-g"
  // OPT1: "-O1"
+// OPT1-NOT: "-g"
  // OPT2: "-O2"
+// OPT2-NOT: "-g"
  // OPT3: "-O3"
+// OPT3-NOT: "-g"
+// DBG: "-g" "--dont-merge-basicblocks" "--return-at-end"
  // SM20: "--gpu-name" "sm_20"
  // SM35: "--gpu-name" "sm_35"
  // SM20: "--output-file" "[[CUBINFILE:[^"]*]]"
author	Artem Belevich <tra@google.com>
	Tue, 16 Feb 2016 22:03:20 +0000 (22:03 +0000)
committer	Artem Belevich <tra@google.com>
	Tue, 16 Feb 2016 22:03:20 +0000 (22:03 +0000)
include/clang/Driver/Options.td		patch \| blob \| history
lib/Driver/Tools.cpp		patch \| blob \| history
test/Driver/cuda-external-tools.cu		patch \| blob \| history