From 771a901cd41673cc86500c479007fa07acdfc8a1 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 12 Feb 2017 23:07:52 +0000 Subject: [PATCH] [TargetLowering] fix SETCC SETLT folding with FP types The bug was introduced with: https://reviews.llvm.org/rL294863 ...and manifests as a selection failure in x86, but that's actually another bug. This fix prevents wrong codegen with -0.0, but in the more common case when we have NSZ and NNAN (-ffast-math), we should still be able to fold this setcc/compare. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294924 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 22 +++++++++++-------- test/CodeGen/X86/vselect-pcmp.ll | 24 +++++++++++++++++++++ 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 3db7c8202d6..3785f402c3e 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -751,25 +751,29 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownOne &= KnownOne2; KnownZero &= KnownZero2; break; - case ISD::SETCC: + case ISD::SETCC: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + ISD::CondCode CC = cast(Op.getOperand(2))->get(); // If (1) we only need the sign-bit, (2) the setcc operands are the same // width as the setcc result, and (3) the result of a setcc conforms to 0 or // -1, we may be able to bypass the setcc. - if (NewMask.isSignBit() && - Op.getOperand(0).getScalarValueSizeInBits() == BitWidth && + if (NewMask.isSignBit() && Op0.getScalarValueSizeInBits() == BitWidth && getBooleanContents(Op.getValueType()) == BooleanContent::ZeroOrNegativeOneBooleanContent) { - ISD::CondCode CC = cast(Op.getOperand(2))->get(); - // If we're testing if X < 0, then this compare isn't needed - just use X! - if (CC == ISD::SETLT && - (isNullConstant(Op.getOperand(1)) || - ISD::isBuildVectorAllZeros(Op.getOperand(1).getNode()))) - return TLO.CombineTo(Op, Op.getOperand(0)); + // If we're testing X < 0, then this compare isn't needed - just use X! + // FIXME: We're limiting to integer types here, but this should also work + // if we don't care about FP signed-zero. The use of SETLT with FP means + // that we don't care about NaNs. + if (CC == ISD::SETLT && Op1.getValueType().isInteger() && + (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode()))) + return TLO.CombineTo(Op, Op0); // TODO: Should we check for other forms of sign-bit comparisons? // Examples: X <= -1, X >= 0 } break; + } case ISD::SHL: if (ConstantSDNode *SA = dyn_cast(Op.getOperand(1))) { unsigned ShAmt = SA->getZExtValue(); diff --git a/test/CodeGen/X86/vselect-pcmp.ll b/test/CodeGen/X86/vselect-pcmp.ll index 3958c414e9a..667bcde6b46 100644 --- a/test/CodeGen/X86/vselect-pcmp.ll +++ b/test/CodeGen/X86/vselect-pcmp.ll @@ -317,3 +317,27 @@ define <8 x double> @signbit_sel_v8f64(<8 x double> %x, <8 x double> %y, <8 x i6 ret <8 x double> %z } +; If we have a floating-point compare: +; (1) Don't die. +; (2) FIXME: If we don't care about signed-zero (and NaN?), the compare should still get folded. + +define <4 x float> @signbit_sel_v4f32_fcmp(<4 x float> %x, <4 x float> %y, <4 x float> %mask) #0 { +; AVX12F-LABEL: signbit_sel_v4f32_fcmp: +; AVX12F: # BB#0: +; AVX12F-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX12F-NEXT: vcmpltps %xmm2, %xmm0, %xmm2 +; AVX12F-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX12F-NEXT: retq +; +; AVX512VL-LABEL: signbit_sel_v4f32_fcmp: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512VL-NEXT: vcmpltps %xmm2, %xmm0, %k1 +; AVX512VL-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} +; AVX512VL-NEXT: retq + %cmp = fcmp olt <4 x float> %x, zeroinitializer + %sel = select <4 x i1> %cmp, <4 x float> %x, <4 x float> %y + ret <4 x float> %sel +} + +attributes #0 = { "no-nans-fp-math"="true" } -- 2.50.1