From: Artem Belevich Date: Tue, 16 Feb 2016 22:03:20 +0000 (+0000) Subject: [CUDA] pass debug options to ptxas. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e88720a6287e5fa399e833ed5ddf4f0b475c37bd;p=clang [CUDA] pass debug options to ptxas. ptxas optimizations are disabled if we need to generate debug info as ptxas does not accept '-g' otherwise. Differential Revision: http://reviews.llvm.org/D17111 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@261018 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td index 2f691c385c..7e5dc098b0 100644 --- a/include/clang/Driver/Options.td +++ b/include/clang/Driver/Options.td @@ -378,6 +378,8 @@ def cuda_gpu_arch_EQ : Joined<["--"], "cuda-gpu-arch=">, Flags<[DriverOption, HelpHidden]>, HelpText<"CUDA GPU architecture">; def cuda_host_only : Flag<["--"], "cuda-host-only">, HelpText<"Do host-side CUDA compilation only">; +def cuda_noopt_device_debug : Flag<["--"], "cuda-noopt-device-debug">, + HelpText<"Enable device-side debug info generation. Disables ptxas optimizations.">; def cuda_path_EQ : Joined<["--"], "cuda-path=">, Group, HelpText<"CUDA installation path">; def dA : Flag<["-"], "dA">, Group; diff --git a/lib/Driver/Tools.cpp b/lib/Driver/Tools.cpp index 44d6b8464a..159936760a 100644 --- a/lib/Driver/Tools.cpp +++ b/lib/Driver/Tools.cpp @@ -10691,15 +10691,20 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA, assert(gpu_archs.size() == 1 && "Exactly one GPU Arch required for ptxas."); const std::string& gpu_arch = gpu_archs[0]; - ArgStringList CmdArgs; CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32"); + if (Args.getLastArg(options::OPT_cuda_noopt_device_debug)) { + // ptxas does not accept -g option if optimization is enabled, so + // we ignore the compiler's -O* options if we want debug info. + CmdArgs.push_back("-g"); + CmdArgs.push_back("--dont-merge-basicblocks"); + CmdArgs.push_back("--return-at-end"); + } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { + // Map the -O we received to -O{0,1,2,3}. + // + // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's + // default, so it may correspond more closely to the spirit of clang -O2. - // Map the -O we received to -O{0,1,2,3}. - // - // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's default, - // so it may correspond more closely to the spirit of clang -O2. - if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { // -O3 seems like the least-bad option when -Osomething is specified to // clang but it isn't handled below. StringRef OOpt = "3"; @@ -10725,9 +10730,6 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-O0"); } - // Don't bother passing -g to ptxas: It's enabled by default at -O0, and - // not supported at other optimization levels. - CmdArgs.push_back("--gpu-name"); CmdArgs.push_back(Args.MakeArgString(gpu_arch)); CmdArgs.push_back("--output-file"); diff --git a/test/Driver/cuda-external-tools.cu b/test/Driver/cuda-external-tools.cu index bdc6a386a7..7a63960789 100644 --- a/test/Driver/cuda-external-tools.cu +++ b/test/Driver/cuda-external-tools.cu @@ -18,6 +18,10 @@ // RUN: %clang -### -target x86_64-linux-gnu -Ofast -c %s 2>&1 \ // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s +// With debugging enabled, ptxas should be run with with no ptxas optimizations. +// RUN: %clang -### -target x86_64-linux-gnu --cuda-noopt-device-debug -O2 -c %s 2>&1 \ +// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix DBG %s + // Regular compile without -O. This should result in us passing -O0 to ptxas. // RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \ // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s @@ -59,9 +63,14 @@ // ARCH64: "-m64" // ARCH32: "-m32" // OPT0: "-O0" +// OPT0-NOT: "-g" // OPT1: "-O1" +// OPT1-NOT: "-g" // OPT2: "-O2" +// OPT2-NOT: "-g" // OPT3: "-O3" +// OPT3-NOT: "-g" +// DBG: "-g" "--dont-merge-basicblocks" "--return-at-end" // SM20: "--gpu-name" "sm_20" // SM35: "--gpu-name" "sm_35" // SM20: "--output-file" "[[CUBINFILE:[^"]*]]"