DAG: Check no signed zeros instead of unsafe math attribute

author Matt Arsenault <Matthew.Arsenault@amd.com>

Thu, 9 Mar 2017 01:36:39 +0000 (01:36 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Thu, 9 Mar 2017 01:36:39 +0000 (01:36 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Thu, 9 Mar 2017 01:36:39 +0000 (01:36 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Thu, 9 Mar 2017 01:36:39 +0000 (01:36 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 8bed34497f673c51c4dc22e17f93c4ad4f9bd889..ba8f12e8d3142fb28644147e08601be538af415c 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9142,7 +9142,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
                         GetNegatedExpression(N0, DAG, LegalOperations), Flags);
  
    // FIXME: Auto-upgrade the target/function-level option.
-  if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) {
+  if (Options.NoSignedZerosFPMath || N->getFlags()->hasNoSignedZeros()) {
      // fold (fadd A, 0) -> A
      if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
        if (N1C->isZero())
@@ -9285,7 +9285,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
                         GetNegatedExpression(N1, DAG, LegalOperations), Flags);
  
    // FIXME: Auto-upgrade the target/function-level option.
-  if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) {
+  if (Options.NoSignedZerosFPMath  || N->getFlags()->hasNoSignedZeros()) {
      // (fsub 0, B) -> -B
      if (N0CFP && N0CFP->isZero()) {
        if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
diff --git a/test/CodeGen/AMDGPU/fadd.ll b/test/CodeGen/AMDGPU/fadd.ll

index 0f683f7bfa23bfe5cfc7a7ab19f833e0370908b5..989eb03268ea7820bb61117fbb1f32270a6c7eee 100644 (file)
--- a/test/CodeGen/AMDGPU/fadd.ll
+++ b/test/CodeGen/AMDGPU/fadd.ll
@@ -5,7 +5,7 @@
  ; FUNC-LABEL: {{^}}fadd_f32:
  ; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
  ; SI: v_add_f32
-define void @fadd_f32(float addrspace(1)* %out, float %a, float %b) {
+define void @fadd_f32(float addrspace(1)* %out, float %a, float %b) #0 {
     %add = fadd float %a, %b
     store float %add, float addrspace(1)* %out, align 4
     ret void
@@ -16,7 +16,7 @@ define void @fadd_f32(float addrspace(1)* %out, float %a, float %b) {
  ; R600-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
  ; SI: v_add_f32
  ; SI: v_add_f32
-define void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
+define void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
    %add = fadd <2 x float> %a, %b
    store <2 x float> %add, <2 x float> addrspace(1)* %out, align 8
    ret void
@@ -31,7 +31,7 @@ define void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x flo
  ; SI: v_add_f32
  ; SI: v_add_f32
  ; SI: v_add_f32
-define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
    %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
    %a = load <4 x float>, <4 x float> addrspace(1)* %in, align 16
    %b = load <4 x float>, <4 x float> addrspace(1)* %b_ptr, align 16
@@ -57,8 +57,19 @@ define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)
  ; SI: v_add_f32
  ; SI: v_add_f32
  ; SI: v_add_f32
-define void @fadd_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) {
+define void @fadd_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) #0 {
    %add = fadd <8 x float> %a, %b
    store <8 x float> %add, <8 x float> addrspace(1)* %out, align 32
    ret void
  }
+
+; FUNC-LABEL: {{^}}fadd_0_nsz_attr_f32:
+; SI-NOT: v_add_f32
+define void @fadd_0_nsz_attr_f32(float addrspace(1)* %out, float %a) #1 {
+   %add = fadd float %a, 0.0
+   store float %add, float addrspace(1)* %out, align 4
+   ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" }
+\ No newline at end of file
diff --git a/test/CodeGen/AMDGPU/fsub.ll b/test/CodeGen/AMDGPU/fsub.ll

index 967ebc85ecb8e9141bf7eb255d5deeb76593a191..de3ab69448b3990f8dfd58bc9c64977280631a71 100644 (file)
--- a/test/CodeGen/AMDGPU/fsub.ll
+++ b/test/CodeGen/AMDGPU/fsub.ll
@@ -121,5 +121,14 @@ define void @v_fneg_fsub_nsz_false_attribute_f32(float addrspace(1)* %out, float
    ret void
  }
  
+; FUNC-LABEL: {{^}}v_fsub_0_nsz_attribute_f32:
+; SI-NOT: v_sub
+define void @v_fsub_0_nsz_attribute_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+  %a = load float, float addrspace(1)* %in, align 4
+  %result = fsub float %a, 0.0
+  store float %result, float addrspace(1)* %out, align 4
+  ret void
+}
+
  attributes #0 = { nounwind "no-signed-zeros-fp-math"="true" }
  attributes #1 = { nounwind "no-signed-zeros-fp-math"="false" }
diff --git a/test/CodeGen/AMDGPU/v_mac.ll b/test/CodeGen/AMDGPU/v_mac.ll

index 911207815e9a682fba367c5464d1a3a897875242..290753c4acbaee9b0784119365968d9a6b353bad 100644 (file)
--- a/test/CodeGen/AMDGPU/v_mac.ll
+++ b/test/CodeGen/AMDGPU/v_mac.ll
@@ -113,10 +113,10 @@ entry:
    ret void
  }
  
-; GCN-LABEL: {{^}}unsafe_mad_sub0_src0:
+; GCN-LABEL: {{^}}nsz_mad_sub0_src0:
  ; GCN-NOT: v_mac_f32
  ; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
-define void @unsafe_mad_sub0_src0(float addrspace(1)* %out, float addrspace(1)* %in) #1 {
+define void @nsz_mad_sub0_src0(float addrspace(1)* %out, float addrspace(1)* %in) #1 {
  entry:
    %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
    %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2
@@ -173,10 +173,10 @@ entry:
    ret void
  }
  
-; GCN-LABEL: {{^}}unsafe_mad_sub0_src1:
+; GCN-LABEL: {{^}}nsz_mad_sub0_src1:
  ; GCN-NOT: v_mac_f32
  ; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
-define void @unsafe_mad_sub0_src1(float addrspace(1)* %out, float addrspace(1)* %in) #1 {
+define void @nsz_mad_sub0_src1(float addrspace(1)* %out, float addrspace(1)* %in) #1 {
  entry:
    %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
    %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2
@@ -281,7 +281,7 @@ bb:
  
  declare i32 @llvm.amdgcn.workitem.id.x() #2
  
-attributes #0 = { nounwind "unsafe-fp-math"="false" }
-attributes #1 = { nounwind "unsafe-fp-math"="true" }
+attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" }
+attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" }
  attributes #2 = { nounwind readnone }
  attributes #3 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/v_mac_f16.ll b/test/CodeGen/AMDGPU/v_mac_f16.ll

index 975ad6a53aac4eff722df574c6c7cd470c132b8c..1b0e6f26090f6eb91129bbc837da6f49172a6845 100644 (file)
--- a/test/CodeGen/AMDGPU/v_mac_f16.ll
+++ b/test/CodeGen/AMDGPU/v_mac_f16.ll
@@ -217,7 +217,7 @@ entry:
    ret void
  }
  
-; GCN-LABEL: {{^}}mac_f16_neg_a_unsafe_fp_math:
+; GCN-LABEL: {{^}}mac_f16_neg_a_nsz_fp_math:
  ; SI: v_cvt_f32_f16_e32 [[CVT_A:v[0-9]+]], v{{[0-9]+}}
  ; SI: v_cvt_f32_f16_e32 [[CVT_B:v[0-9]+]], v{{[0-9]+}}
  ; SI: v_cvt_f32_f16_e32 [[CVT_C:v[0-9]+]], v{{[0-9]+}}
@@ -226,7 +226,7 @@ entry:
  ; VI-NOT: v_mac_f16
  ; VI:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}}
  ; GCN:    s_endpgm
-define void @mac_f16_neg_a_unsafe_fp_math(
+define void @mac_f16_neg_a_nsz_fp_math(
      half addrspace(1)* %r,
      half addrspace(1)* %a,
      half addrspace(1)* %b,
@@ -244,7 +244,7 @@ entry:
    ret void
  }
  
-; GCN-LABEL: {{^}}mac_f16_neg_b_unsafe_fp_math:
+; GCN-LABEL: {{^}}mac_f16_neg_b_nsz_fp_math:
  ; SI: v_cvt_f32_f16_e32 [[CVT_A:v[0-9]+]], v{{[0-9]+}}
  ; SI: v_cvt_f32_f16_e32 [[CVT_B:v[0-9]+]], v{{[0-9]+}}
  ; SI: v_cvt_f32_f16_e32 [[CVT_C:v[0-9]+]], v{{[0-9]+}}
@@ -253,7 +253,7 @@ entry:
  ; VI-NOT: v_mac_f16
  ; VI:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}}
  ; GCN:    s_endpgm
-define void @mac_f16_neg_b_unsafe_fp_math(
+define void @mac_f16_neg_b_nsz_fp_math(
      half addrspace(1)* %r,
      half addrspace(1)* %a,
      half addrspace(1)* %b,
@@ -271,7 +271,7 @@ entry:
    ret void
  }
  
-; GCN-LABEL: {{^}}mac_f16_neg_c_unsafe_fp_math:
+; GCN-LABEL: {{^}}mac_f16_neg_c_nsz_fp_math:
  ; SI: v_cvt_f32_f16_e32 [[CVT_A:v[0-9]+]], v{{[0-9]+}}
  ; SI: v_cvt_f32_f16_e32 [[CVT_B:v[0-9]+]], v{{[0-9]+}}
  ; SI: v_cvt_f32_f16_e32 [[CVT_C:v[0-9]+]], v{{[0-9]+}}
@@ -280,7 +280,7 @@ entry:
  ; VI-NOT: v_mac_f16
  ; VI:     v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]}}
  ; GCN:    s_endpgm
-define void @mac_f16_neg_c_unsafe_fp_math(
+define void @mac_f16_neg_c_nsz_fp_math(
      half addrspace(1)* %r,
      half addrspace(1)* %a,
      half addrspace(1)* %b,
@@ -552,7 +552,7 @@ entry:
    ret void
  }
  
-; GCN-LABEL: {{^}}mac_v2f16_neg_a_unsafe_fp_math:
+; GCN-LABEL: {{^}}mac_v2f16_neg_a_nsz_fp_math:
  ; SI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], {{v[0-9]+}}
  ; SI: v_cvt_f32_f16_e32 [[CVT1:v[0-9]+]], {{v[0-9]+}}
  ; SI: v_cvt_f32_f16_e32 [[CVT2:v[0-9]+]], {{v[0-9]+}}
@@ -567,7 +567,7 @@ entry:
  ; VI:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
  ; VI:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
  ; GCN:    s_endpgm
-define void @mac_v2f16_neg_a_unsafe_fp_math(
+define void @mac_v2f16_neg_a_nsz_fp_math(
      <2 x half> addrspace(1)* %r,
      <2 x half> addrspace(1)* %a,
      <2 x half> addrspace(1)* %b,
@@ -585,7 +585,7 @@ entry:
    ret void
  }
  
-; GCN-LABEL: {{^}}mac_v2f16_neg_b_unsafe_fp_math:
+; GCN-LABEL: {{^}}mac_v2f16_neg_b_nsz_fp_math:
  ; SI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], {{v[0-9]+}}
  ; SI: v_cvt_f32_f16_e32 [[CVT1:v[0-9]+]], {{v[0-9]+}}
  ; SI: v_cvt_f32_f16_e32 [[CVT2:v[0-9]+]], {{v[0-9]+}}
@@ -600,7 +600,7 @@ entry:
  ; VI:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
  ; VI:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
  ; GCN:    s_endpgm
-define void @mac_v2f16_neg_b_unsafe_fp_math(
+define void @mac_v2f16_neg_b_nsz_fp_math(
      <2 x half> addrspace(1)* %r,
      <2 x half> addrspace(1)* %a,
      <2 x half> addrspace(1)* %b,
@@ -618,7 +618,7 @@ entry:
    ret void
  }
  
-; GCN-LABEL: {{^}}mac_v2f16_neg_c_unsafe_fp_math:
+; GCN-LABEL: {{^}}mac_v2f16_neg_c_nsz_fp_math:
  ; SI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], {{v[0-9]+}}
  ; SI: v_cvt_f32_f16_e32 [[CVT1:v[0-9]+]], {{v[0-9]+}}
  ; SI: v_cvt_f32_f16_e32 [[CVT2:v[0-9]+]], {{v[0-9]+}}
@@ -633,7 +633,7 @@ entry:
  ; VI:     v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}
  ; VI:     v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}
  ; GCN:    s_endpgm
-define void @mac_v2f16_neg_c_unsafe_fp_math(
+define void @mac_v2f16_neg_c_nsz_fp_math(
      <2 x half> addrspace(1)* %r,
      <2 x half> addrspace(1)* %a,
      <2 x half> addrspace(1)* %b,
@@ -651,5 +651,5 @@ entry:
    ret void
  }
  
-attributes #0 = { nounwind "unsafe-fp-math"="false" }
-attributes #1 = { nounwind "unsafe-fp-math"="true" }
+attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" }
+attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" }
diff --git a/test/CodeGen/X86/fadd-combines.ll b/test/CodeGen/X86/fadd-combines.ll

index 2df0e06dc2528688352f1b51593d36387b1b8d50..28f72f42d01d41d8d713946d75aff1bf97e29f41 100644 (file)
--- a/test/CodeGen/X86/fadd-combines.ll
+++ b/test/CodeGen/X86/fadd-combines.ll
@@ -221,4 +221,4 @@ define <4 x float> @fadd_fadd_x_x_fadd_x_x_4f32(<4 x float> %x) #0 {
    ret <4 x float> %z
  }
  
-attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }
+attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "no-signed-zeros-fp-math"="true" }
diff --git a/test/CodeGen/X86/vec_unsafe-fp-math.ll b/test/CodeGen/X86/vec_unsafe-fp-math.ll

index 827d4184d111bbc6c27a4c9b471e162cc2c38d8a..1c352782fca4fa9507a49968fd91d3d392832d40 100644 (file)
--- a/test/CodeGen/X86/vec_unsafe-fp-math.ll
+++ b/test/CodeGen/X86/vec_unsafe-fp-math.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -enable-unsafe-fp-math -mtriple=x86_64-unknown-unknown -mcpu=corei7 | FileCheck %s
+; RUN: llc < %s -enable-unsafe-fp-math -enable-no-signed-zeros-fp-math -mtriple=x86_64-unknown-unknown -mcpu=corei7 | FileCheck %s
  
  ; Make sure that vectors get the same benefits as scalars when using unsafe-fp-math.
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Thu, 9 Mar 2017 01:36:39 +0000 (01:36 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Thu, 9 Mar 2017 01:36:39 +0000 (01:36 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/fadd.ll		patch \| blob \| history
test/CodeGen/AMDGPU/fsub.ll		patch \| blob \| history
test/CodeGen/AMDGPU/v_mac.ll		patch \| blob \| history
test/CodeGen/AMDGPU/v_mac_f16.ll		patch \| blob \| history
test/CodeGen/X86/fadd-combines.ll		patch \| blob \| history
test/CodeGen/X86/vec_unsafe-fp-math.ll		patch \| blob \| history