From 53c102ac20116062f637a701b3ab8ffb116b5efe Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 6 Mar 2019 16:11:03 +0000 Subject: [PATCH] [DAGCombiner] Enable UADDO/USUBO vector combine support Differential Revision: https://reviews.llvm.org/D58965 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@355517 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 19 ++++++------- test/CodeGen/X86/combine-addo.ll | 33 ++++++++-------------- test/CodeGen/X86/combine-subo.ll | 36 ++++-------------------- 3 files changed, 26 insertions(+), 62 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a839073e63d..2e4c5933b9e 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2443,8 +2443,6 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); - if (VT.isVector()) - return SDValue(); EVT CarryVT = N->getValueType(1); SDLoc DL(N); @@ -2455,13 +2453,12 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) { DAG.getUNDEF(CarryVT)); // canonicalize constant to RHS. - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); - if (N0C && !N1C) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0); // fold (uaddo x, 0) -> x + no carry out - if (isNullConstant(N1)) + if (isNullOrNullSplat(N1)) return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT)); // If it cannot overflow, transform into an add. @@ -2488,7 +2485,9 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) { } SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) { - auto VT = N0.getValueType(); + EVT VT = N0.getValueType(); + if (VT.isVector()) + return SDValue(); // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) // If Y + 1 cannot overflow. @@ -2952,8 +2951,6 @@ SDValue DAGCombiner::visitUSUBO(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); - if (VT.isVector()) - return SDValue(); EVT CarryVT = N->getValueType(1); SDLoc DL(N); @@ -2969,11 +2966,11 @@ SDValue DAGCombiner::visitUSUBO(SDNode *N) { DAG.getConstant(0, DL, CarryVT)); // fold (usubo x, 0) -> x + no borrow - if (isNullConstant(N1)) + if (isNullOrNullSplat(N1)) return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT)); // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow - if (isAllOnesConstant(N0)) + if (isAllOnesOrAllOnesSplat(N0)) return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0), DAG.getConstant(0, DL, CarryVT)); diff --git a/test/CodeGen/X86/combine-addo.ll b/test/CodeGen/X86/combine-addo.ll index 23e5366f5e9..e93254e052a 100644 --- a/test/CodeGen/X86/combine-addo.ll +++ b/test/CodeGen/X86/combine-addo.ll @@ -62,18 +62,10 @@ define i32 @combine_uadd_zero(i32 %a0, i32 %a1) { define <4 x i32> @combine_vec_uadd_zero(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: combine_vec_uadd_zero: ; SSE: # %bb.0: -; SSE-NEXT: movdqa %xmm0, %xmm2 -; SSE-NEXT: pmaxud %xmm0, %xmm0 -; SSE-NEXT: pcmpeqd %xmm2, %xmm0 -; SSE-NEXT: blendvps %xmm0, %xmm2, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_uadd_zero: ; AVX: # %bb.0: -; AVX-NEXT: vpmaxud %xmm0, %xmm0, %xmm2 -; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2 -; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq %1 = call {<4 x i32>, <4 x i1>} @llvm.uadd.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> zeroinitializer) %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0 @@ -108,24 +100,23 @@ define i32 @combine_uadd_not(i32 %a0, i32 %a1) { define <4 x i32> @combine_vec_uadd_not(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: combine_vec_uadd_not: ; SSE: # %bb.0: -; SSE-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE-NEXT: pxor %xmm2, %xmm0 -; SSE-NEXT: movdqa %xmm0, %xmm3 -; SSE-NEXT: psubd %xmm2, %xmm3 -; SSE-NEXT: pmaxud %xmm3, %xmm0 -; SSE-NEXT: pcmpeqd %xmm3, %xmm0 -; SSE-NEXT: blendvps %xmm0, %xmm3, %xmm1 +; SSE-NEXT: pxor %xmm2, %xmm2 +; SSE-NEXT: psubd %xmm0, %xmm2 +; SSE-NEXT: movdqa {{.*#+}} xmm0 = [1,1,1,1] +; SSE-NEXT: pmaxud %xmm2, %xmm0 +; SSE-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE-NEXT: blendvps %xmm0, %xmm2, %xmm1 ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_uadd_not: ; AVX: # %bb.0: -; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpsubd %xmm2, %xmm0, %xmm2 -; AVX-NEXT: vpmaxud %xmm0, %xmm2, %xmm0 -; AVX-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0 -; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpsubd %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1] +; AVX-NEXT: vpmaxud %xmm2, %xmm0, %xmm2 +; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2 +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq %1 = xor <4 x i32> %a0, %2 = call {<4 x i32>, <4 x i1>} @llvm.uadd.with.overflow.v4i32(<4 x i32> %1, <4 x i32> ) diff --git a/test/CodeGen/X86/combine-subo.ll b/test/CodeGen/X86/combine-subo.ll index c162515c257..5113c95f920 100644 --- a/test/CodeGen/X86/combine-subo.ll +++ b/test/CodeGen/X86/combine-subo.ll @@ -62,18 +62,10 @@ define i32 @combine_usub_zero(i32 %a0, i32 %a1) { define <4 x i32> @combine_vec_usub_zero(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: combine_vec_usub_zero: ; SSE: # %bb.0: -; SSE-NEXT: movdqa %xmm0, %xmm2 -; SSE-NEXT: pminud %xmm0, %xmm0 -; SSE-NEXT: pcmpeqd %xmm2, %xmm0 -; SSE-NEXT: blendvps %xmm0, %xmm2, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_usub_zero: ; AVX: # %bb.0: -; AVX-NEXT: vpminud %xmm0, %xmm0, %xmm2 -; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2 -; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq %1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> zeroinitializer) %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0 @@ -138,20 +130,12 @@ define i32 @combine_usub_self(i32 %a0, i32 %a1) { define <4 x i32> @combine_vec_usub_self(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: combine_vec_usub_self: ; SSE: # %bb.0: -; SSE-NEXT: movdqa %xmm0, %xmm2 -; SSE-NEXT: psubd %xmm0, %xmm2 -; SSE-NEXT: pminud %xmm2, %xmm0 -; SSE-NEXT: pcmpeqd %xmm2, %xmm0 -; SSE-NEXT: blendvps %xmm0, %xmm2, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_usub_self: ; AVX: # %bb.0: -; AVX-NEXT: vpsubd %xmm0, %xmm0, %xmm2 -; AVX-NEXT: vpminud %xmm0, %xmm2, %xmm0 -; AVX-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0 -; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> %a0) %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0 @@ -183,22 +167,14 @@ define i32 @combine_usub_negone(i32 %a0, i32 %a1) { define <4 x i32> @combine_vec_usub_negone(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: combine_vec_usub_negone: ; SSE: # %bb.0: -; SSE-NEXT: movdqa %xmm0, %xmm2 -; SSE-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE-NEXT: pxor %xmm0, %xmm2 -; SSE-NEXT: pminud %xmm2, %xmm0 -; SSE-NEXT: pcmpeqd %xmm2, %xmm0 -; SSE-NEXT: blendvps %xmm0, %xmm2, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE-NEXT: pxor %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_usub_negone: ; AVX: # %bb.0: -; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpminud %xmm2, %xmm0, %xmm2 -; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2 -; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> , <4 x i32> %a0) %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0 -- 2.40.0