]> granicus.if.org Git - clang/commitdiff
[AMDGPU][GFX9] Set +fp32-denormals for >=gfx900 unless -cl-denorms-are-zero is set
authorKonstantin Zhuravlyov <kzhuravl_dev@outlook.com>
Fri, 14 Apr 2017 05:33:57 +0000 (05:33 +0000)
committerKonstantin Zhuravlyov <kzhuravl_dev@outlook.com>
Fri, 14 Apr 2017 05:33:57 +0000 (05:33 +0000)
Differential Revision: https://reviews.llvm.org/D31482

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@300306 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Basic/Targets.cpp
test/CodeGenOpenCL/gfx9-fp32-denorms.cl [new file with mode: 0644]

index fb22d402989b7d6eab00e904db7cd993ac47cc4c..18f503d091c1201893b3f3e8927482301aab3465 100644 (file)
@@ -2112,9 +2112,12 @@ class AMDGPUTargetInfo final : public TargetInfo {
   bool hasFP64:1;
   bool hasFMAF:1;
   bool hasLDEXPF:1;
-  bool hasFullSpeedFP32Denorms:1;
   const AddrSpace AS;
 
+  static bool hasFullSpeedFMAF32(StringRef GPUName) {
+    return parseAMDGCNName(GPUName) >= GK_GFX9;
+  }
+
   static bool isAMDGCN(const llvm::Triple &TT) {
     return TT.getArch() == llvm::Triple::amdgcn;
   }
@@ -2130,7 +2133,6 @@ public:
       hasFP64(false),
       hasFMAF(false),
       hasLDEXPF(false),
-      hasFullSpeedFP32Denorms(false),
       AS(isGenericZero(Triple)){
     if (getTriple().getArch() == llvm::Triple::amdgcn) {
       hasFP64 = true;
@@ -2200,7 +2202,8 @@ public:
         hasFP64Denormals = true;
     }
     if (!hasFP32Denormals)
-      TargetOpts.Features.push_back((Twine(hasFullSpeedFP32Denorms &&
+      TargetOpts.Features.push_back(
+          (Twine(hasFullSpeedFMAF32(TargetOpts.CPU) &&
           !CGOpts.FlushDenorm ? '+' : '-') + Twine("fp32-denormals")).str());
     // Always do not flush fp64 or fp16 denorms.
     if (!hasFP64Denormals && hasFP64)
diff --git a/test/CodeGenOpenCL/gfx9-fp32-denorms.cl b/test/CodeGenOpenCL/gfx9-fp32-denorms.cl
new file mode 100644 (file)
index 0000000..ccb4c6d
--- /dev/null
@@ -0,0 +1,13 @@
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - %s | FileCheck --check-prefix=DEFAULT %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - -target-feature +fp32-denormals %s | FileCheck --check-prefix=FEATURE_FP32_DENORMALS_ON %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - -target-feature -fp32-denormals %s | FileCheck --check-prefix=FEATURE_FP32_DENORMALS_OFF %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - -cl-denorms-are-zero %s | FileCheck --check-prefix=OPT_DENORMS_ARE_ZERO %s
+
+// DEFAULT: +fp32-denormals
+// FEATURE_FP32_DENORMALS_ON: +fp32-denormals
+// FEATURE_FP32_DENORMALS_OFF: -fp32-denormals
+// OPT_DENORMS_ARE_ZERO: -fp32-denormals
+
+kernel void gfx9_fp32_denorms() {}