[DAG] vector div/rem with any zero element in divisor is undef

author Sanjay Patel <spatel@rotateright.com>

Tue, 14 Mar 2017 18:06:28 +0000 (18:06 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Tue, 14 Mar 2017 18:06:28 +0000 (18:06 +0000)
author Sanjay Patel <spatel@rotateright.com>
Tue, 14 Mar 2017 18:06:28 +0000 (18:06 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Tue, 14 Mar 2017 18:06:28 +0000 (18:06 +0000)
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h

index cd4567cf105db786d48dcad48a77656d3f5c7a5b..4e75c0f0ef38d23e2d070571a3ba606b75cbea83 100644 (file)
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -740,6 +740,9 @@ public:
      return getNode(ISD::CALLSEQ_END, DL, NodeTys, Ops);
    }
  
+  /// Return true if the result of this operation is always undefined.
+  bool isUndef(unsigned Opcode, ArrayRef<SDValue> Ops);
+
    /// Return an UNDEF node. UNDEF does not have a useful SDLoc.
    SDValue getUNDEF(EVT VT) {
      return getNode(ISD::UNDEF, SDLoc(), VT);
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 30330e4588c3ecee7ec00a420b02cef06bedb723..788b52dabe93d2f68ee2cb35d3da298effe30d94 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2524,15 +2524,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
    EVT VT = N->getValueType(0);
    SDLoc DL(N);
  
-  // X / undef -> undef
-  // X % undef -> undef
-  if (N1.isUndef())
-    return N1;
-
-  // X / 0 --> undef
-  // X % 0 --> undef
-  // We don't need to preserve faults!
-  if (isNullConstantOrNullSplatConstant(N1))
+  if (DAG.isUndef(N->getOpcode(), {N0, N1}))
      return DAG.getUNDEF(VT);
  
    // undef / X -> 0
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

index d8acd3cfe9764665aedfa92fca9becad0f473ee2..4f33eef7513e0da0700ea69f701cc991ebb1d427 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3695,12 +3695,6 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
    if (Cst1->isOpaque() || Cst2->isOpaque())
      return SDValue();
  
-  // Division/remainder with a zero divisor is undefined behavior.
-  if ((Opcode == ISD::SDIV || Opcode == ISD::UDIV ||
-       Opcode == ISD::SREM || Opcode == ISD::UREM) &&
-      Cst2->isNullValue())
-    return getUNDEF(VT);
-
    std::pair<APInt, bool> Folded = FoldValue(Opcode, Cst1->getAPIntValue(),
                                              Cst2->getAPIntValue());
    if (!Folded.second)
@@ -3728,6 +3722,30 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,
                            GA->getOffset() + uint64_t(Offset));
  }
  
+bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) {
+  switch (Opcode) {
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::SREM:
+  case ISD::UREM: {
+    // If a divisor is zero/undef or any element of a divisor vector is
+    // zero/undef, the whole op is undef.
+    assert(Ops.size() == 2 && "Div/rem should have 2 operands");
+    SDValue Divisor = Ops[1];
+    if (Divisor.isUndef() || isNullConstant(Divisor))
+      return true;
+
+    return ISD::isBuildVectorOfConstantSDNodes(Divisor.getNode()) &&
+           any_of(Divisor->op_values(),
+                  [](SDValue V) { return V.isUndef() || isNullConstant(V); });
+    // TODO: Handle signed overflow.
+  }
+  // TODO: Handle oversized shifts.
+  default:
+    return false;
+  }
+}
+
  SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
                                               EVT VT, SDNode *Cst1,
                                               SDNode *Cst2) {
@@ -3737,6 +3755,9 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
    if (Opcode >= ISD::BUILTIN_OP_END)
      return SDValue();
  
+  if (isUndef(Opcode, {SDValue(Cst1, 0), SDValue(Cst2, 0)}))
+    return getUNDEF(VT);
+
    // Handle the case of two scalars.
    if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) {
      if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) {
@@ -3804,6 +3825,9 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
    if (Opcode >= ISD::BUILTIN_OP_END)
      return SDValue();
  
+  if (isUndef(Opcode, Ops))
+    return getUNDEF(VT);
+
    // We can only fold vectors - maybe merge with FoldConstantArithmetic someday?
    if (!VT.isVector())
      return SDValue();
diff --git a/test/CodeGen/X86/div-rem-simplify.ll b/test/CodeGen/X86/div-rem-simplify.ll

index c0ce3e57505e4c511aecd298e4a2e61adddc44d6..04cf439dc1555d4d90c9a1616d5fe8d0a8cbf0d2 100644 (file)
--- a/test/CodeGen/X86/div-rem-simplify.ll
+++ b/test/CodeGen/X86/div-rem-simplify.ll
@@ -152,7 +152,6 @@ define <4 x i32> @sel_sdiv0_vec(i1 %cond) {
  define <4 x i32> @sdiv0elt_vec(<4 x i32> %x) {
  ; CHECK-LABEL: sdiv0elt_vec:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    movaps {{.*#+}} xmm0 = <u,12,u,4294967292>
  ; CHECK-NEXT:    retq
    %zero = and <4 x i32> %x, <i32 0, i32 0, i32 0, i32 0>
    %some_ones = or <4 x i32> %zero, <i32 0, i32 -1, i32 0, i32 3>
@@ -163,7 +162,6 @@ define <4 x i32> @sdiv0elt_vec(<4 x i32> %x) {
  define <4 x i32> @udiv0elt_vec(<4 x i32> %x) {
  ; CHECK-LABEL: udiv0elt_vec:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    movaps {{.*#+}} xmm0 = <u,4,3,u>
  ; CHECK-NEXT:    retq
    %div = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, <i32 0, i32 3, i32 4, i32 0>
    ret <4 x i32> %div
@@ -172,7 +170,6 @@ define <4 x i32> @udiv0elt_vec(<4 x i32> %x) {
  define <4 x i32> @urem0elt_vec(<4 x i32> %x) {
  ; CHECK-LABEL: urem0elt_vec:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    movaps {{.*#+}} xmm0 = <u,u,u,2>
  ; CHECK-NEXT:    retq
    %zero = and <4 x i32> %x, <i32 0, i32 0, i32 0, i32 0>
    %some_ones = or <4 x i32> %zero, <i32 0, i32 0, i32 0, i32 3>
@@ -183,8 +180,6 @@ define <4 x i32> @urem0elt_vec(<4 x i32> %x) {
  define <4 x i32> @srem0elt_vec(<4 x i32> %x) {
  ; CHECK-LABEL: srem0elt_vec:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    movl $-2, %eax
-; CHECK-NEXT:    movd %eax, %xmm0
  ; CHECK-NEXT:    retq
    %rem = srem <4 x i32> <i32 -11, i32 -12, i32 -13, i32 -14>, <i32 -3, i32 -3, i32 0, i32 2>
    ret <4 x i32> %rem
diff --git a/test/CodeGen/X86/vec_sdiv_to_shift.ll b/test/CodeGen/X86/vec_sdiv_to_shift.ll

index 01a91936e5688a22dbd08942474971fdcc0914a4..f0c9069d8c797b365365e222916f191a2b145336 100644 (file)
--- a/test/CodeGen/X86/vec_sdiv_to_shift.ll
+++ b/test/CodeGen/X86/vec_sdiv_to_shift.ll
@@ -184,27 +184,15 @@ entry:
    ret <16 x i16> %a0
  }
  
-; TODO: The div-by-0 lanes are folded away, so we use scalar ops. Would it be better to keep this in the vector unit?
+; Div-by-0 in any lane is UB.
  
  define <4 x i32> @sdiv_non_splat(<4 x i32> %x) {
  ; SSE-LABEL: sdiv_non_splat:
  ; SSE:       # BB#0:
-; SSE-NEXT:    movd %xmm0, %eax
-; SSE-NEXT:    movl %eax, %ecx
-; SSE-NEXT:    shrl $31, %ecx
-; SSE-NEXT:    addl %eax, %ecx
-; SSE-NEXT:    sarl %ecx
-; SSE-NEXT:    movd %ecx, %xmm0
  ; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: sdiv_non_splat:
  ; AVX:       # BB#0:
-; AVX-NEXT:    vmovd %xmm0, %eax
-; AVX-NEXT:    movl %eax, %ecx
-; AVX-NEXT:    shrl $31, %ecx
-; AVX-NEXT:    addl %eax, %ecx
-; AVX-NEXT:    sarl %ecx
-; AVX-NEXT:    vmovd %ecx, %xmm0
  ; AVX-NEXT:    retq
    %y = sdiv <4 x i32> %x, <i32 2, i32 0, i32 0, i32 0>
    ret <4 x i32> %y
author	Sanjay Patel <spatel@rotateright.com>
	Tue, 14 Mar 2017 18:06:28 +0000 (18:06 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Tue, 14 Mar 2017 18:06:28 +0000 (18:06 +0000)
include/llvm/CodeGen/SelectionDAG.h		patch \| blob \| history
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/SelectionDAG.cpp		patch \| blob \| history
test/CodeGen/X86/div-rem-simplify.ll		patch \| blob \| history
test/CodeGen/X86/vec_sdiv_to_shift.ll		patch \| blob \| history