[CUDA] Add -fcuda-flush-denormals-to-zero.

author Justin Lebar <jlebar@google.com>

Tue, 5 Apr 2016 18:26:20 +0000 (18:26 +0000)

committer Justin Lebar <jlebar@google.com>

Tue, 5 Apr 2016 18:26:20 +0000 (18:26 +0000)
author Justin Lebar <jlebar@google.com>
Tue, 5 Apr 2016 18:26:20 +0000 (18:26 +0000)
committer Justin Lebar <jlebar@google.com>
Tue, 5 Apr 2016 18:26:20 +0000 (18:26 +0000)
diff --git a/include/clang/Basic/LangOptions.def b/include/clang/Basic/LangOptions.def

index a62836884200bfbfe5b23162d23ed89f599aab55..43e96d10d626ddedea0c763b98bde7cccb6770d7 100644 (file)
--- a/include/clang/Basic/LangOptions.def
+++ b/include/clang/Basic/LangOptions.def
@@ -173,6 +173,7 @@ LANGOPT(OpenMPIsDevice    , 1, 0, "Generate code only for OpenMP target device")
  LANGOPT(CUDAIsDevice      , 1, 0, "compiling for CUDA device")
  LANGOPT(CUDAAllowVariadicFunctions, 1, 0, "allowing variadic functions in CUDA device code")
  LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr functions as __host__ __device__")
+LANGOPT(CUDADeviceFlushDenormalsToZero, 1, 0, "flushing denormals to zero")
  
  LANGOPT(AssumeSaneOperatorNew , 1, 1, "implicit __attribute__((malloc)) for C++'s new operators")
  LANGOPT(SizedDeallocation , 1, 0, "enable sized deallocation functions")
diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td

index a1befe379bd9a9ca7b845b0372ff3d468d5cad00..9af204399929a98cb566dfdfa2d0741a08d3781b 100644 (file)
--- a/include/clang/Driver/Options.td
+++ b/include/clang/Driver/Options.td
@@ -382,6 +382,9 @@ def cuda_noopt_device_debug : Flag<["--"], "cuda-noopt-device-debug">,
    HelpText<"Enable device-side debug info generation. Disables ptxas optimizations.">;
  def cuda_path_EQ : Joined<["--"], "cuda-path=">, Group<i_Group>,
    HelpText<"CUDA installation path">;
+def fcuda_flush_denormals_to_zero : Flag<["-"], "fcuda-flush-denormals-to-zero">,
+  Flags<[CC1Option]>, HelpText<"Flush denormal floating point values to zero in CUDA device mode.">;
+def fno_cuda_flush_denormals_to_zero : Flag<["-"], "fno-cuda-flush-denormals-to-zero">;
  def dA : Flag<["-"], "dA">, Group<d_Group>;
  def dD : Flag<["-"], "dD">, Group<d_Group>, Flags<[CC1Option]>,
    HelpText<"Print macro definitions in -E mode in addition to normal output">;
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp

index cbd7422e6aae752d28b449de556a02597a285ee8..04c0a116001d10bdd778fa3406cb39c2b0d9f9a8 100644 (file)
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -1768,6 +1768,10 @@ void CodeGenModule::ConstructAttributeList(
      // __syncthreads(), and so can't have certain optimizations applied around
      // them).  LLVM will remove this attribute where it safely can.
      FuncAttrs.addAttribute(llvm::Attribute::Convergent);
+
+    // Respect -fcuda-flush-denormals-to-zero.
+    if (getLangOpts().CUDADeviceFlushDenormalsToZero)
+      FuncAttrs.addAttribute("nvptx-f32ftz", "true");
    }
  
    ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI);
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp

index 11f5bea64ca051fedc4093760356655f03ffed46..f525246e6ef6619c070552355bcde135d7d4101d 100644 (file)
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -472,6 +472,14 @@ void CodeGenModule::Release() {
      getModule().addModuleFlag(llvm::Module::Override, "Cross-DSO CFI", 1);
    }
  
+  if (LangOpts.CUDAIsDevice && getTarget().getTriple().isNVPTX()) {
+    // Indicate whether __nvvm_reflect should be configured to flush denormal
+    // floating point values to 0.  (This corresponds to its "__CUDA_FTZ"
+    // property.)
+    getModule().addModuleFlag(llvm::Module::Override, "nvvm-reflect-ftz",
+                              LangOpts.CUDADeviceFlushDenormalsToZero ? 1 : 0);
+  }
+
    if (uint32_t PLevel = Context.getLangOpts().PICLevel) {
      llvm::PICLevel::Level PL = llvm::PICLevel::Default;
      switch (PLevel) {
diff --git a/lib/Driver/ToolChains.cpp b/lib/Driver/ToolChains.cpp

index 11ded7cd5eba3fa2a96eff5e5343fca11326c4fa..902338b2884136735458eb81c39b20361be349c7 100644 (file)
--- a/lib/Driver/ToolChains.cpp
+++ b/lib/Driver/ToolChains.cpp
@@ -4208,6 +4208,10 @@ CudaToolChain::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
    Linux::addClangTargetOptions(DriverArgs, CC1Args);
    CC1Args.push_back("-fcuda-is-device");
  
+  if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
+                         options::OPT_fno_cuda_flush_denormals_to_zero, false))
+    CC1Args.push_back("-fcuda-flush-denormals-to-zero");
+
    if (DriverArgs.hasArg(options::OPT_nocudalib))
      return;
  
diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp

index 4d97b76d753829cd0c1bbfc1f723653fc509bd10..ba56665bc84c1e567babc91dd8894a0782575b03 100644 (file)
--- a/lib/Frontend/CompilerInvocation.cpp
+++ b/lib/Frontend/CompilerInvocation.cpp
@@ -1571,6 +1571,9 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
    if (Args.hasArg(OPT_fno_cuda_host_device_constexpr))
      Opts.CUDAHostDeviceConstexpr = 0;
  
+  if (Opts.CUDAIsDevice && Args.hasArg(OPT_fcuda_flush_denormals_to_zero))
+    Opts.CUDADeviceFlushDenormalsToZero = 1;
+
    if (Opts.ObjC1) {
      if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) {
        StringRef value = arg->getValue();
diff --git a/test/CodeGenCUDA/flush-denormals.cu b/test/CodeGenCUDA/flush-denormals.cu

new file mode 100644 (file)

index 0000000..cab6602
--- /dev/null
+++ b/test/CodeGenCUDA/flush-denormals.cu
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -fcuda-is-device \
+// RUN:   -triple nvptx-nvidia-cuda -emit-llvm -o - %s | FileCheck %s -check-prefix NOFTZ
+// RUN: %clang_cc1 -fcuda-is-device -fcuda-flush-denormals-to-zero \
+// RUN:   -triple nvptx-nvidia-cuda -emit-llvm -o - %s | FileCheck %s -check-prefix FTZ
+
+#include "Inputs/cuda.h"
+
+// Checks that device function calls get emitted with the "ntpvx-f32ftz"
+// attribute set to "true" when we compile CUDA device code with
+// -fcuda-flush-denormals-to-zero.  Further, check that we reflect the presence
+// or absence of -fcuda-flush-denormals-to-zero in a module flag.
+
+// CHECK: define void @foo() #0
+extern "C" __device__ void foo() {}
+
+// FTZ: attributes #0 = {{.*}} "nvptx-f32ftz"="true"
+// NOFTZ-NOT: attributes #0 = {{.*}} "nvptx-f32ftz"
+
+// FTZ:!llvm.module.flags = !{[[MODFLAG:![0-9]+]]}
+// FTZ:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 1}
+
+// NOFTZ:!llvm.module.flags = !{[[MODFLAG:![0-9]+]]}
+// NOFTZ:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 0}
author	Justin Lebar <jlebar@google.com>
	Tue, 5 Apr 2016 18:26:20 +0000 (18:26 +0000)
committer	Justin Lebar <jlebar@google.com>
	Tue, 5 Apr 2016 18:26:20 +0000 (18:26 +0000)
include/clang/Basic/LangOptions.def		patch \| blob \| history
include/clang/Driver/Options.td		patch \| blob \| history
lib/CodeGen/CGCall.cpp		patch \| blob \| history
lib/CodeGen/CodeGenModule.cpp		patch \| blob \| history
lib/Driver/ToolChains.cpp		patch \| blob \| history
lib/Frontend/CompilerInvocation.cpp		patch \| blob \| history
test/CodeGenCUDA/flush-denormals.cu	[new file with mode: 0644]	patch \| blob