From: Matt Arsenault Date: Thu, 9 Mar 2017 01:36:39 +0000 (+0000) Subject: DAG: Check no signed zeros instead of unsafe math attribute X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=6d62c7135700363cc706ac70ed420a914eab371f;p=llvm DAG: Check no signed zeros instead of unsafe math attribute git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@297354 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8bed34497f6..ba8f12e8d31 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9142,7 +9142,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { GetNegatedExpression(N0, DAG, LegalOperations), Flags); // FIXME: Auto-upgrade the target/function-level option. - if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) { + if (Options.NoSignedZerosFPMath || N->getFlags()->hasNoSignedZeros()) { // fold (fadd A, 0) -> A if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1)) if (N1C->isZero()) @@ -9285,7 +9285,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { GetNegatedExpression(N1, DAG, LegalOperations), Flags); // FIXME: Auto-upgrade the target/function-level option. - if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) { + if (Options.NoSignedZerosFPMath || N->getFlags()->hasNoSignedZeros()) { // (fsub 0, B) -> -B if (N0CFP && N0CFP->isZero()) { if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) diff --git a/test/CodeGen/AMDGPU/fadd.ll b/test/CodeGen/AMDGPU/fadd.ll index 0f683f7bfa2..989eb03268e 100644 --- a/test/CodeGen/AMDGPU/fadd.ll +++ b/test/CodeGen/AMDGPU/fadd.ll @@ -5,7 +5,7 @@ ; FUNC-LABEL: {{^}}fadd_f32: ; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W ; SI: v_add_f32 -define void @fadd_f32(float addrspace(1)* %out, float %a, float %b) { +define void @fadd_f32(float addrspace(1)* %out, float %a, float %b) #0 { %add = fadd float %a, %b store float %add, float addrspace(1)* %out, align 4 ret void @@ -16,7 +16,7 @@ define void @fadd_f32(float addrspace(1)* %out, float %a, float %b) { ; R600-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y ; SI: v_add_f32 ; SI: v_add_f32 -define void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) { +define void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { %add = fadd <2 x float> %a, %b store <2 x float> %add, <2 x float> addrspace(1)* %out, align 8 ret void @@ -31,7 +31,7 @@ define void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x flo ; SI: v_add_f32 ; SI: v_add_f32 ; SI: v_add_f32 -define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { +define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 { %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 %a = load <4 x float>, <4 x float> addrspace(1)* %in, align 16 %b = load <4 x float>, <4 x float> addrspace(1)* %b_ptr, align 16 @@ -57,8 +57,19 @@ define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1) ; SI: v_add_f32 ; SI: v_add_f32 ; SI: v_add_f32 -define void @fadd_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) { +define void @fadd_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) #0 { %add = fadd <8 x float> %a, %b store <8 x float> %add, <8 x float> addrspace(1)* %out, align 32 ret void } + +; FUNC-LABEL: {{^}}fadd_0_nsz_attr_f32: +; SI-NOT: v_add_f32 +define void @fadd_0_nsz_attr_f32(float addrspace(1)* %out, float %a) #1 { + %add = fadd float %a, 0.0 + store float %add, float addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" } \ No newline at end of file diff --git a/test/CodeGen/AMDGPU/fsub.ll b/test/CodeGen/AMDGPU/fsub.ll index 967ebc85ecb..de3ab69448b 100644 --- a/test/CodeGen/AMDGPU/fsub.ll +++ b/test/CodeGen/AMDGPU/fsub.ll @@ -121,5 +121,14 @@ define void @v_fneg_fsub_nsz_false_attribute_f32(float addrspace(1)* %out, float ret void } +; FUNC-LABEL: {{^}}v_fsub_0_nsz_attribute_f32: +; SI-NOT: v_sub +define void @v_fsub_0_nsz_attribute_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { + %a = load float, float addrspace(1)* %in, align 4 + %result = fsub float %a, 0.0 + store float %result, float addrspace(1)* %out, align 4 + ret void +} + attributes #0 = { nounwind "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind "no-signed-zeros-fp-math"="false" } diff --git a/test/CodeGen/AMDGPU/v_mac.ll b/test/CodeGen/AMDGPU/v_mac.ll index 911207815e9..290753c4acb 100644 --- a/test/CodeGen/AMDGPU/v_mac.ll +++ b/test/CodeGen/AMDGPU/v_mac.ll @@ -113,10 +113,10 @@ entry: ret void } -; GCN-LABEL: {{^}}unsafe_mad_sub0_src0: +; GCN-LABEL: {{^}}nsz_mad_sub0_src0: ; GCN-NOT: v_mac_f32 ; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} -define void @unsafe_mad_sub0_src0(float addrspace(1)* %out, float addrspace(1)* %in) #1 { +define void @nsz_mad_sub0_src0(float addrspace(1)* %out, float addrspace(1)* %in) #1 { entry: %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1 %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2 @@ -173,10 +173,10 @@ entry: ret void } -; GCN-LABEL: {{^}}unsafe_mad_sub0_src1: +; GCN-LABEL: {{^}}nsz_mad_sub0_src1: ; GCN-NOT: v_mac_f32 ; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} -define void @unsafe_mad_sub0_src1(float addrspace(1)* %out, float addrspace(1)* %in) #1 { +define void @nsz_mad_sub0_src1(float addrspace(1)* %out, float addrspace(1)* %in) #1 { entry: %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1 %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2 @@ -281,7 +281,7 @@ bb: declare i32 @llvm.amdgcn.workitem.id.x() #2 -attributes #0 = { nounwind "unsafe-fp-math"="false" } -attributes #1 = { nounwind "unsafe-fp-math"="true" } +attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" } +attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" } attributes #2 = { nounwind readnone } attributes #3 = { nounwind } diff --git a/test/CodeGen/AMDGPU/v_mac_f16.ll b/test/CodeGen/AMDGPU/v_mac_f16.ll index 975ad6a53aa..1b0e6f26090 100644 --- a/test/CodeGen/AMDGPU/v_mac_f16.ll +++ b/test/CodeGen/AMDGPU/v_mac_f16.ll @@ -217,7 +217,7 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_f16_neg_a_unsafe_fp_math: +; GCN-LABEL: {{^}}mac_f16_neg_a_nsz_fp_math: ; SI: v_cvt_f32_f16_e32 [[CVT_A:v[0-9]+]], v{{[0-9]+}} ; SI: v_cvt_f32_f16_e32 [[CVT_B:v[0-9]+]], v{{[0-9]+}} ; SI: v_cvt_f32_f16_e32 [[CVT_C:v[0-9]+]], v{{[0-9]+}} @@ -226,7 +226,7 @@ entry: ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}} ; GCN: s_endpgm -define void @mac_f16_neg_a_unsafe_fp_math( +define void @mac_f16_neg_a_nsz_fp_math( half addrspace(1)* %r, half addrspace(1)* %a, half addrspace(1)* %b, @@ -244,7 +244,7 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_f16_neg_b_unsafe_fp_math: +; GCN-LABEL: {{^}}mac_f16_neg_b_nsz_fp_math: ; SI: v_cvt_f32_f16_e32 [[CVT_A:v[0-9]+]], v{{[0-9]+}} ; SI: v_cvt_f32_f16_e32 [[CVT_B:v[0-9]+]], v{{[0-9]+}} ; SI: v_cvt_f32_f16_e32 [[CVT_C:v[0-9]+]], v{{[0-9]+}} @@ -253,7 +253,7 @@ entry: ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}} ; GCN: s_endpgm -define void @mac_f16_neg_b_unsafe_fp_math( +define void @mac_f16_neg_b_nsz_fp_math( half addrspace(1)* %r, half addrspace(1)* %a, half addrspace(1)* %b, @@ -271,7 +271,7 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_f16_neg_c_unsafe_fp_math: +; GCN-LABEL: {{^}}mac_f16_neg_c_nsz_fp_math: ; SI: v_cvt_f32_f16_e32 [[CVT_A:v[0-9]+]], v{{[0-9]+}} ; SI: v_cvt_f32_f16_e32 [[CVT_B:v[0-9]+]], v{{[0-9]+}} ; SI: v_cvt_f32_f16_e32 [[CVT_C:v[0-9]+]], v{{[0-9]+}} @@ -280,7 +280,7 @@ entry: ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]}} ; GCN: s_endpgm -define void @mac_f16_neg_c_unsafe_fp_math( +define void @mac_f16_neg_c_nsz_fp_math( half addrspace(1)* %r, half addrspace(1)* %a, half addrspace(1)* %b, @@ -552,7 +552,7 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_v2f16_neg_a_unsafe_fp_math: +; GCN-LABEL: {{^}}mac_v2f16_neg_a_nsz_fp_math: ; SI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], {{v[0-9]+}} ; SI: v_cvt_f32_f16_e32 [[CVT1:v[0-9]+]], {{v[0-9]+}} ; SI: v_cvt_f32_f16_e32 [[CVT2:v[0-9]+]], {{v[0-9]+}} @@ -567,7 +567,7 @@ entry: ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} ; GCN: s_endpgm -define void @mac_v2f16_neg_a_unsafe_fp_math( +define void @mac_v2f16_neg_a_nsz_fp_math( <2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %a, <2 x half> addrspace(1)* %b, @@ -585,7 +585,7 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_v2f16_neg_b_unsafe_fp_math: +; GCN-LABEL: {{^}}mac_v2f16_neg_b_nsz_fp_math: ; SI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], {{v[0-9]+}} ; SI: v_cvt_f32_f16_e32 [[CVT1:v[0-9]+]], {{v[0-9]+}} ; SI: v_cvt_f32_f16_e32 [[CVT2:v[0-9]+]], {{v[0-9]+}} @@ -600,7 +600,7 @@ entry: ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} ; GCN: s_endpgm -define void @mac_v2f16_neg_b_unsafe_fp_math( +define void @mac_v2f16_neg_b_nsz_fp_math( <2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %a, <2 x half> addrspace(1)* %b, @@ -618,7 +618,7 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_v2f16_neg_c_unsafe_fp_math: +; GCN-LABEL: {{^}}mac_v2f16_neg_c_nsz_fp_math: ; SI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], {{v[0-9]+}} ; SI: v_cvt_f32_f16_e32 [[CVT1:v[0-9]+]], {{v[0-9]+}} ; SI: v_cvt_f32_f16_e32 [[CVT2:v[0-9]+]], {{v[0-9]+}} @@ -633,7 +633,7 @@ entry: ; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}} ; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}} ; GCN: s_endpgm -define void @mac_v2f16_neg_c_unsafe_fp_math( +define void @mac_v2f16_neg_c_nsz_fp_math( <2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %a, <2 x half> addrspace(1)* %b, @@ -651,5 +651,5 @@ entry: ret void } -attributes #0 = { nounwind "unsafe-fp-math"="false" } -attributes #1 = { nounwind "unsafe-fp-math"="true" } +attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" } +attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" } diff --git a/test/CodeGen/X86/fadd-combines.ll b/test/CodeGen/X86/fadd-combines.ll index 2df0e06dc25..28f72f42d01 100644 --- a/test/CodeGen/X86/fadd-combines.ll +++ b/test/CodeGen/X86/fadd-combines.ll @@ -221,4 +221,4 @@ define <4 x float> @fadd_fadd_x_x_fadd_x_x_4f32(<4 x float> %x) #0 { ret <4 x float> %z } -attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" } +attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "no-signed-zeros-fp-math"="true" } diff --git a/test/CodeGen/X86/vec_unsafe-fp-math.ll b/test/CodeGen/X86/vec_unsafe-fp-math.ll index 827d4184d11..1c352782fca 100644 --- a/test/CodeGen/X86/vec_unsafe-fp-math.ll +++ b/test/CodeGen/X86/vec_unsafe-fp-math.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -enable-unsafe-fp-math -mtriple=x86_64-unknown-unknown -mcpu=corei7 | FileCheck %s +; RUN: llc < %s -enable-unsafe-fp-math -enable-no-signed-zeros-fp-math -mtriple=x86_64-unknown-unknown -mcpu=corei7 | FileCheck %s ; Make sure that vectors get the same benefits as scalars when using unsafe-fp-math.