From: Sanjay Patel Date: Tue, 14 Mar 2017 18:06:28 +0000 (+0000) Subject: [DAG] vector div/rem with any zero element in divisor is undef X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=bea559baff6bb689615c6c244cad9ebd4bb856a4;p=llvm [DAG] vector div/rem with any zero element in divisor is undef This is the backend counterpart to: https://reviews.llvm.org/rL297390 https://reviews.llvm.org/rL297409 and follow-up to: https://reviews.llvm.org/rL297384 It surprised me that we need to duplicate the check in FoldConstantArithmetic and FoldConstantVectorArithmetic, but one or the other doesn't catch all of the test cases. There is an existing code comment about merging those someday. Differential Revision: https://reviews.llvm.org/D30826 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@297762 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index cd4567cf105..4e75c0f0ef3 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -740,6 +740,9 @@ public: return getNode(ISD::CALLSEQ_END, DL, NodeTys, Ops); } + /// Return true if the result of this operation is always undefined. + bool isUndef(unsigned Opcode, ArrayRef Ops); + /// Return an UNDEF node. UNDEF does not have a useful SDLoc. SDValue getUNDEF(EVT VT) { return getNode(ISD::UNDEF, SDLoc(), VT); diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 30330e4588c..788b52dabe9 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2524,15 +2524,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); SDLoc DL(N); - // X / undef -> undef - // X % undef -> undef - if (N1.isUndef()) - return N1; - - // X / 0 --> undef - // X % 0 --> undef - // We don't need to preserve faults! - if (isNullConstantOrNullSplatConstant(N1)) + if (DAG.isUndef(N->getOpcode(), {N0, N1})) return DAG.getUNDEF(VT); // undef / X -> 0 diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index d8acd3cfe97..4f33eef7513 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3695,12 +3695,6 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, if (Cst1->isOpaque() || Cst2->isOpaque()) return SDValue(); - // Division/remainder with a zero divisor is undefined behavior. - if ((Opcode == ISD::SDIV || Opcode == ISD::UDIV || - Opcode == ISD::SREM || Opcode == ISD::UREM) && - Cst2->isNullValue()) - return getUNDEF(VT); - std::pair Folded = FoldValue(Opcode, Cst1->getAPIntValue(), Cst2->getAPIntValue()); if (!Folded.second) @@ -3728,6 +3722,30 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT, GA->getOffset() + uint64_t(Offset)); } +bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef Ops) { + switch (Opcode) { + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: { + // If a divisor is zero/undef or any element of a divisor vector is + // zero/undef, the whole op is undef. + assert(Ops.size() == 2 && "Div/rem should have 2 operands"); + SDValue Divisor = Ops[1]; + if (Divisor.isUndef() || isNullConstant(Divisor)) + return true; + + return ISD::isBuildVectorOfConstantSDNodes(Divisor.getNode()) && + any_of(Divisor->op_values(), + [](SDValue V) { return V.isUndef() || isNullConstant(V); }); + // TODO: Handle signed overflow. + } + // TODO: Handle oversized shifts. + default: + return false; + } +} + SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, SDNode *Cst1, SDNode *Cst2) { @@ -3737,6 +3755,9 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, if (Opcode >= ISD::BUILTIN_OP_END) return SDValue(); + if (isUndef(Opcode, {SDValue(Cst1, 0), SDValue(Cst2, 0)})) + return getUNDEF(VT); + // Handle the case of two scalars. if (const ConstantSDNode *Scalar1 = dyn_cast(Cst1)) { if (const ConstantSDNode *Scalar2 = dyn_cast(Cst2)) { @@ -3804,6 +3825,9 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, if (Opcode >= ISD::BUILTIN_OP_END) return SDValue(); + if (isUndef(Opcode, Ops)) + return getUNDEF(VT); + // We can only fold vectors - maybe merge with FoldConstantArithmetic someday? if (!VT.isVector()) return SDValue(); diff --git a/test/CodeGen/X86/div-rem-simplify.ll b/test/CodeGen/X86/div-rem-simplify.ll index c0ce3e57505..04cf439dc15 100644 --- a/test/CodeGen/X86/div-rem-simplify.ll +++ b/test/CodeGen/X86/div-rem-simplify.ll @@ -152,7 +152,6 @@ define <4 x i32> @sel_sdiv0_vec(i1 %cond) { define <4 x i32> @sdiv0elt_vec(<4 x i32> %x) { ; CHECK-LABEL: sdiv0elt_vec: ; CHECK: # BB#0: -; CHECK-NEXT: movaps {{.*#+}} xmm0 = ; CHECK-NEXT: retq %zero = and <4 x i32> %x, %some_ones = or <4 x i32> %zero, @@ -163,7 +162,6 @@ define <4 x i32> @sdiv0elt_vec(<4 x i32> %x) { define <4 x i32> @udiv0elt_vec(<4 x i32> %x) { ; CHECK-LABEL: udiv0elt_vec: ; CHECK: # BB#0: -; CHECK-NEXT: movaps {{.*#+}} xmm0 = ; CHECK-NEXT: retq %div = udiv <4 x i32> , ret <4 x i32> %div @@ -172,7 +170,6 @@ define <4 x i32> @udiv0elt_vec(<4 x i32> %x) { define <4 x i32> @urem0elt_vec(<4 x i32> %x) { ; CHECK-LABEL: urem0elt_vec: ; CHECK: # BB#0: -; CHECK-NEXT: movaps {{.*#+}} xmm0 = ; CHECK-NEXT: retq %zero = and <4 x i32> %x, %some_ones = or <4 x i32> %zero, @@ -183,8 +180,6 @@ define <4 x i32> @urem0elt_vec(<4 x i32> %x) { define <4 x i32> @srem0elt_vec(<4 x i32> %x) { ; CHECK-LABEL: srem0elt_vec: ; CHECK: # BB#0: -; CHECK-NEXT: movl $-2, %eax -; CHECK-NEXT: movd %eax, %xmm0 ; CHECK-NEXT: retq %rem = srem <4 x i32> , ret <4 x i32> %rem diff --git a/test/CodeGen/X86/vec_sdiv_to_shift.ll b/test/CodeGen/X86/vec_sdiv_to_shift.ll index 01a91936e56..f0c9069d8c7 100644 --- a/test/CodeGen/X86/vec_sdiv_to_shift.ll +++ b/test/CodeGen/X86/vec_sdiv_to_shift.ll @@ -184,27 +184,15 @@ entry: ret <16 x i16> %a0 } -; TODO: The div-by-0 lanes are folded away, so we use scalar ops. Would it be better to keep this in the vector unit? +; Div-by-0 in any lane is UB. define <4 x i32> @sdiv_non_splat(<4 x i32> %x) { ; SSE-LABEL: sdiv_non_splat: ; SSE: # BB#0: -; SSE-NEXT: movd %xmm0, %eax -; SSE-NEXT: movl %eax, %ecx -; SSE-NEXT: shrl $31, %ecx -; SSE-NEXT: addl %eax, %ecx -; SSE-NEXT: sarl %ecx -; SSE-NEXT: movd %ecx, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: sdiv_non_splat: ; AVX: # BB#0: -; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: movl %eax, %ecx -; AVX-NEXT: shrl $31, %ecx -; AVX-NEXT: addl %eax, %ecx -; AVX-NEXT: sarl %ecx -; AVX-NEXT: vmovd %ecx, %xmm0 ; AVX-NEXT: retq %y = sdiv <4 x i32> %x, ret <4 x i32> %y