DAG: Recognize no-signed-zeros-fp-math attribute

author Matt Arsenault <Matthew.Arsenault@amd.com>

Wed, 25 Jan 2017 06:08:42 +0000 (06:08 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Wed, 25 Jan 2017 06:08:42 +0000 (06:08 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Wed, 25 Jan 2017 06:08:42 +0000 (06:08 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Wed, 25 Jan 2017 06:08:42 +0000 (06:08 +0000)
diff --git a/include/llvm/CodeGen/CommandFlags.h b/include/llvm/CodeGen/CommandFlags.h

index aab522d00de75fabf4d1865f190437834c081a87..9d5943fa55247639c50c32861414673255e9d82c 100644 (file)
--- a/include/llvm/CodeGen/CommandFlags.h
+++ b/include/llvm/CodeGen/CommandFlags.h
@@ -143,6 +143,12 @@ EnableNoNaNsFPMath("enable-no-nans-fp-math",
                     cl::desc("Enable FP math optimizations that assume no NaNs"),
                     cl::init(false));
  
+cl::opt<bool>
+EnableNoSignedZerosFPMath("enable-no-signed-zeros-fp-math",
+                          cl::desc("Enable FP math optimizations that assume "
+                                   "the sign of 0 is insignificant"),
+                          cl::init(false));
+
  cl::opt<bool>
  EnableNoTrappingFPMath("enable-no-trapping-fp-math",
                         cl::desc("Enable setting the FP exceptions build "
@@ -282,6 +288,7 @@ static inline TargetOptions InitTargetOptionsFromCodeGenFlags() {
    Options.UnsafeFPMath = EnableUnsafeFPMath;
    Options.NoInfsFPMath = EnableNoInfsFPMath;
    Options.NoNaNsFPMath = EnableNoNaNsFPMath;
+  Options.NoSignedZerosFPMath = EnableNoSignedZerosFPMath;
    Options.NoTrappingFPMath = EnableNoTrappingFPMath;
    Options.FPDenormalMode = DenormalMode;
    Options.HonorSignDependentRoundingFPMathOption =
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h

index f73afb4706e17bb6fc4ab593a9ca7389d7accc15..ee73dcf9e6f9dfa8a50ff23d2e1f9d7a7fdc0d76 100644 (file)
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -153,11 +153,17 @@ namespace llvm {
      /// assume the FP arithmetic arguments and results are never NaNs.
      unsigned NoNaNsFPMath : 1;
  
-    /// NoTrappingFPMath - This flag is enabled when the 
-    /// -enable-no-trapping-fp-math is specified on the command line. This 
+    /// NoTrappingFPMath - This flag is enabled when the
+    /// -enable-no-trapping-fp-math is specified on the command line. This
      /// specifies that there are no trap handlers to handle exceptions.
      unsigned NoTrappingFPMath : 1;
  
+    /// NoSignedZerosFPMath - This flag is enabled when the
+    /// -enable-no-signed-zeros-fp-math is specified on the command line. This
+    /// specifies that optimizations are allowed to treat the sign of a zero
+    /// argument or result as insignificant.
+    unsigned NoSignedZerosFPMath : 1;
+
      /// HonorSignDependentRoundingFPMath - This returns true when the
      /// -enable-sign-dependent-rounding-fp-math is specified.  If this returns
      /// false (the default), the code generator is allowed to assume that the
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index ce2e52b7128b612a7f6756a0ac7dd6ca9d739176..dd887a2985b969d512884ae065688a81d1735edc 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -635,7 +635,8 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
                                Depth + 1);
    case ISD::FSUB:
      // We can't turn -(A-B) into B-A when we honor signed zeros.
-    if (!Options->UnsafeFPMath && !Op.getNode()->getFlags()->hasNoSignedZeros())
+    if (!Options->NoSignedZerosFPMath &&
+        !Op.getNode()->getFlags()->hasNoSignedZeros())
        return 0;
  
      // fold (fneg (fsub A, B)) -> (fsub B, A)
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp

index 2b3f282f3635f5eb8463f3f64e2cea1b59b34f34..438c62dd5aebcaf44968a04d021d8b0f358fc792 100644 (file)
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -84,6 +84,7 @@ void TargetMachine::resetTargetOptions(const Function &F) const {
    RESET_OPTION(UnsafeFPMath, "unsafe-fp-math");
    RESET_OPTION(NoInfsFPMath, "no-infs-fp-math");
    RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math");
+  RESET_OPTION(NoSignedZerosFPMath, "no-signed-zeros-fp-math");
    RESET_OPTION(NoTrappingFPMath, "no-trapping-math");
  
    StringRef Denormal =
diff --git a/test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll b/test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll

new file mode 100644 (file)

index 0000000..76b50b5
--- /dev/null
+++ b/test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=amdgcn -enable-no-signed-zeros-fp-math=0 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SAFE %s
+; RUN: llc -march=amdgcn -enable-no-signed-zeros-fp-math=1 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-UNSAFE %s
+; RUN: llc -march=amdgcn -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-UNSAFE %s
+
+; Test that the -enable-no-signed-zeros-fp-math flag works
+
+; GCN-LABEL: {{^}}fneg_fsub_f32:
+; GCN: v_subrev_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
+; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]]
+
+; GCN-UNSAFE-NOT: xor
+define void @fneg_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+  %a = load float, float addrspace(1)* %in, align 4
+  %b = load float, float addrspace(1)* %b_ptr, align 4
+  %result = fsub float %a, %b
+  %neg.result = fsub float -0.0, %result
+  store float %neg.result, float addrspace(1)* %out, align 4
+  ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/fsub.ll b/test/CodeGen/AMDGPU/fsub.ll

index 3429df33c015f80bad2646bdaa7542229c5efc2a..a92035f2235809a84bcfe85505771cdd61cc15a0 100644 (file)
--- a/test/CodeGen/AMDGPU/fsub.ll
+++ b/test/CodeGen/AMDGPU/fsub.ll
@@ -69,3 +69,61 @@ define void @s_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x f
    store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
    ret void
  }
+
+; FUNC-LABEL: {{^}}v_fneg_fsub_f32:
+; SI: v_subrev_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]]
+define void @v_fneg_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+  %a = load float, float addrspace(1)* %in, align 4
+  %b = load float, float addrspace(1)* %b_ptr, align 4
+  %result = fsub float %a, %b
+  %neg.result = fsub float -0.0, %result
+  store float %neg.result, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_f32:
+; SI: v_subrev_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
+; SI-NOT: xor
+define void @v_fneg_fsub_nsz_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+  %a = load float, float addrspace(1)* %in, align 4
+  %b = load float, float addrspace(1)* %b_ptr, align 4
+  %result = fsub nsz float %a, %b
+  %neg.result = fsub float -0.0, %result
+  store float %neg.result, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_attribute_f32:
+; SI: v_subrev_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
+; SI-NOT: xor
+define void @v_fneg_fsub_nsz_attribute_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+  %a = load float, float addrspace(1)* %in, align 4
+  %b = load float, float addrspace(1)* %b_ptr, align 4
+  %result = fsub float %a, %b
+  %neg.result = fsub float -0.0, %result
+  store float %neg.result, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; For some reason the attribute has a string "true" or "false", so
+; make sure it is disabled and the fneg is not folded if it is not
+; "true".
+; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_false_attribute_f32:
+; SI: v_subrev_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]]
+define void @v_fneg_fsub_nsz_false_attribute_f32(float addrspace(1)* %out, float addrspace(1)* %in) #1 {
+  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+  %a = load float, float addrspace(1)* %in, align 4
+  %b = load float, float addrspace(1)* %b_ptr, align 4
+  %result = fsub float %a, %b
+  %neg.result = fsub float -0.0, %result
+  store float %neg.result, float addrspace(1)* %out, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "no-signed-zeros-fp-math"="true" }
+attributes #1 = { nounwind "no-signed-zeros-fp-math"="false" }
diff --git a/test/CodeGen/X86/negative-sin.ll b/test/CodeGen/X86/negative-sin.ll

index 16258f4794028ca4a883cec75c3e7f92286778ef..bc38021b5620cc4bbbbf4c25cc8caa3b24b3dc4b 100644 (file)
--- a/test/CodeGen/X86/negative-sin.ll
+++ b/test/CodeGen/X86/negative-sin.ll
@@ -101,5 +101,5 @@ define double @fn_attr(double %e) nounwind #0 {
    ret double %h
  }
  
-attributes #0 = { "unsafe-fp-math"="true" }
+attributes #0 = { "unsafe-fp-math"="true" "no-signed-zeros-fp-math"="true" }
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Wed, 25 Jan 2017 06:08:42 +0000 (06:08 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Wed, 25 Jan 2017 06:08:42 +0000 (06:08 +0000)
include/llvm/CodeGen/CommandFlags.h		patch \| blob \| history
include/llvm/Target/TargetOptions.h		patch \| blob \| history
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
lib/Target/TargetMachine.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/AMDGPU/fsub.ll		patch \| blob \| history
test/CodeGen/X86/negative-sin.ll		patch \| blob \| history