cl::desc("Enable FP math optimizations that assume no NaNs"),
cl::init(false));
+cl::opt<bool>
+EnableNoSignedZerosFPMath("enable-no-signed-zeros-fp-math",
+ cl::desc("Enable FP math optimizations that assume "
+ "the sign of 0 is insignificant"),
+ cl::init(false));
+
cl::opt<bool>
EnableNoTrappingFPMath("enable-no-trapping-fp-math",
cl::desc("Enable setting the FP exceptions build "
Options.UnsafeFPMath = EnableUnsafeFPMath;
Options.NoInfsFPMath = EnableNoInfsFPMath;
Options.NoNaNsFPMath = EnableNoNaNsFPMath;
+ Options.NoSignedZerosFPMath = EnableNoSignedZerosFPMath;
Options.NoTrappingFPMath = EnableNoTrappingFPMath;
Options.FPDenormalMode = DenormalMode;
Options.HonorSignDependentRoundingFPMathOption =
/// assume the FP arithmetic arguments and results are never NaNs.
unsigned NoNaNsFPMath : 1;
- /// NoTrappingFPMath - This flag is enabled when the
- /// -enable-no-trapping-fp-math is specified on the command line. This
+ /// NoTrappingFPMath - This flag is enabled when the
+ /// -enable-no-trapping-fp-math is specified on the command line. This
/// specifies that there are no trap handlers to handle exceptions.
unsigned NoTrappingFPMath : 1;
+ /// NoSignedZerosFPMath - This flag is enabled when the
+ /// -enable-no-signed-zeros-fp-math is specified on the command line. This
+ /// specifies that optimizations are allowed to treat the sign of a zero
+ /// argument or result as insignificant.
+ unsigned NoSignedZerosFPMath : 1;
+
/// HonorSignDependentRoundingFPMath - This returns true when the
/// -enable-sign-dependent-rounding-fp-math is specified. If this returns
/// false (the default), the code generator is allowed to assume that the
--- /dev/null
+; RUN: llc -march=amdgcn -enable-no-signed-zeros-fp-math=0 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SAFE %s
+; RUN: llc -march=amdgcn -enable-no-signed-zeros-fp-math=1 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-UNSAFE %s
+; RUN: llc -march=amdgcn -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-UNSAFE %s
+
+; Test that the -enable-no-signed-zeros-fp-math flag works
+
+; GCN-LABEL: {{^}}fneg_fsub_f32:
+; GCN: v_subrev_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
+; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]]
+
+; GCN-UNSAFE-NOT: xor
+define void @fneg_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+ %a = load float, float addrspace(1)* %in, align 4
+ %b = load float, float addrspace(1)* %b_ptr, align 4
+ %result = fsub float %a, %b
+ %neg.result = fsub float -0.0, %result
+ store float %neg.result, float addrspace(1)* %out, align 4
+ ret void
+}
+
+attributes #0 = { nounwind }
store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
ret void
}
+
+; FUNC-LABEL: {{^}}v_fneg_fsub_f32:
+; SI: v_subrev_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]]
+define void @v_fneg_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+ %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+ %a = load float, float addrspace(1)* %in, align 4
+ %b = load float, float addrspace(1)* %b_ptr, align 4
+ %result = fsub float %a, %b
+ %neg.result = fsub float -0.0, %result
+ store float %neg.result, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_f32:
+; SI: v_subrev_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
+; SI-NOT: xor
+define void @v_fneg_fsub_nsz_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+ %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+ %a = load float, float addrspace(1)* %in, align 4
+ %b = load float, float addrspace(1)* %b_ptr, align 4
+ %result = fsub nsz float %a, %b
+ %neg.result = fsub float -0.0, %result
+ store float %neg.result, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_attribute_f32:
+; SI: v_subrev_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
+; SI-NOT: xor
+define void @v_fneg_fsub_nsz_attribute_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+ %a = load float, float addrspace(1)* %in, align 4
+ %b = load float, float addrspace(1)* %b_ptr, align 4
+ %result = fsub float %a, %b
+ %neg.result = fsub float -0.0, %result
+ store float %neg.result, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; For some reason the attribute has a string "true" or "false", so
+; make sure it is disabled and the fneg is not folded if it is not
+; "true".
+; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_false_attribute_f32:
+; SI: v_subrev_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]]
+define void @v_fneg_fsub_nsz_false_attribute_f32(float addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+ %a = load float, float addrspace(1)* %in, align 4
+ %b = load float, float addrspace(1)* %b_ptr, align 4
+ %result = fsub float %a, %b
+ %neg.result = fsub float -0.0, %result
+ store float %neg.result, float addrspace(1)* %out, align 4
+ ret void
+}
+
+attributes #0 = { nounwind "no-signed-zeros-fp-math"="true" }
+attributes #1 = { nounwind "no-signed-zeros-fp-math"="false" }