[TargetLowering] fix SETCC SETLT folding with FP types

author Sanjay Patel <spatel@rotateright.com>

Sun, 12 Feb 2017 23:07:52 +0000 (23:07 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Sun, 12 Feb 2017 23:07:52 +0000 (23:07 +0000)
author Sanjay Patel <spatel@rotateright.com>
Sun, 12 Feb 2017 23:07:52 +0000 (23:07 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Sun, 12 Feb 2017 23:07:52 +0000 (23:07 +0000)
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp

index 3db7c8202d644eeab2520440d62e2622e02479dc..3785f402c3e8f5ef001e17374bd15d1e1354fde3 100644 (file)
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -751,25 +751,29 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
      KnownOne &= KnownOne2;
      KnownZero &= KnownZero2;
      break;
-  case ISD::SETCC:
+  case ISD::SETCC: {
+    SDValue Op0 = Op.getOperand(0);
+    SDValue Op1 = Op.getOperand(1);
+    ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
      // If (1) we only need the sign-bit, (2) the setcc operands are the same
      // width as the setcc result, and (3) the result of a setcc conforms to 0 or
      // -1, we may be able to bypass the setcc.
-    if (NewMask.isSignBit() &&
-        Op.getOperand(0).getScalarValueSizeInBits() == BitWidth &&
+    if (NewMask.isSignBit() && Op0.getScalarValueSizeInBits() == BitWidth &&
          getBooleanContents(Op.getValueType()) ==
              BooleanContent::ZeroOrNegativeOneBooleanContent) {
-      ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
-      // If we're testing if X < 0, then this compare isn't needed - just use X!
-      if (CC == ISD::SETLT &&
-          (isNullConstant(Op.getOperand(1)) ||
-           ISD::isBuildVectorAllZeros(Op.getOperand(1).getNode())))
-        return TLO.CombineTo(Op, Op.getOperand(0));
+      // If we're testing X < 0, then this compare isn't needed - just use X!
+      // FIXME: We're limiting to integer types here, but this should also work
+      // if we don't care about FP signed-zero. The use of SETLT with FP means
+      // that we don't care about NaNs.
+      if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
+          (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
+        return TLO.CombineTo(Op, Op0);
  
        // TODO: Should we check for other forms of sign-bit comparisons?
        // Examples: X <= -1, X >= 0
      }
      break;
+  }
    case ISD::SHL:
      if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
        unsigned ShAmt = SA->getZExtValue();
diff --git a/test/CodeGen/X86/vselect-pcmp.ll b/test/CodeGen/X86/vselect-pcmp.ll

index 3958c414e9a887c6b31e6c935ee22b3ae80008d4..667bcde6b462815ba96da3eea868b81c38e7ed03 100644 (file)
--- a/test/CodeGen/X86/vselect-pcmp.ll
+++ b/test/CodeGen/X86/vselect-pcmp.ll
@@ -317,3 +317,27 @@ define <8 x double> @signbit_sel_v8f64(<8 x double> %x, <8 x double> %y, <8 x i6
    ret <8 x double> %z
  }
  
+; If we have a floating-point compare:
+; (1) Don't die.
+; (2) FIXME: If we don't care about signed-zero (and NaN?), the compare should still get folded.
+
+define <4 x float> @signbit_sel_v4f32_fcmp(<4 x float> %x, <4 x float> %y, <4 x float> %mask) #0 {
+; AVX12F-LABEL: signbit_sel_v4f32_fcmp:
+; AVX12F:       # BB#0:
+; AVX12F-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX12F-NEXT:    vcmpltps %xmm2, %xmm0, %xmm2
+; AVX12F-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; AVX12F-NEXT:    retq
+;
+; AVX512VL-LABEL: signbit_sel_v4f32_fcmp:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX512VL-NEXT:    vcmpltps %xmm2, %xmm0, %k1
+; AVX512VL-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1}
+; AVX512VL-NEXT:    retq
+  %cmp = fcmp olt <4 x float> %x, zeroinitializer
+  %sel = select <4 x i1> %cmp, <4 x float> %x, <4 x float> %y
+  ret <4 x float> %sel
+}
+
+attributes #0 = { "no-nans-fp-math"="true" }
author	Sanjay Patel <spatel@rotateright.com>
	Sun, 12 Feb 2017 23:07:52 +0000 (23:07 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Sun, 12 Feb 2017 23:07:52 +0000 (23:07 +0000)
lib/CodeGen/SelectionDAG/TargetLowering.cpp		patch \| blob \| history
test/CodeGen/X86/vselect-pcmp.ll		patch \| blob \| history