From bcd58e4ff63070590c6c345671a2bed0f2ac4d81 Mon Sep 17 00:00:00 2001 From: Amaury Sechet Date: Fri, 8 Mar 2019 19:39:32 +0000 Subject: [PATCH] [DAGCombiner] fold (add (add (xor a, -1), b), 1) -> (sub b, a) Summary: This pattern is sometime created after legalization. Reviewers: efriedma, spatel, RKSimon, zvi, bkramer Subscribers: llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D58874 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@355716 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 28 ++++++++++++++++++++---- test/CodeGen/X86/add.ll | 16 +++++--------- test/CodeGen/X86/combine-add.ll | 25 +++++++-------------- 3 files changed, 37 insertions(+), 32 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 30259c170c4..6ac6bb8c206 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2186,10 +2186,30 @@ SDValue DAGCombiner::visitADD(SDNode *N) { DAG.haveNoCommonBitsSet(N0, N1)) return DAG.getNode(ISD::OR, DL, VT, N0, N1); - // fold (add (xor a, -1), 1) -> (sub 0, a) - if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) - return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), - N0.getOperand(0)); + if (isOneOrOneSplat(N1)) { + // fold (add (xor a, -1), 1) -> (sub 0, a) + if (isBitwiseNot(N0)) + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), + N0.getOperand(0)); + + // fold (add (add (xor a, -1), b), 1) -> (sub b, a) + if (N0.getOpcode() == ISD::ADD || + N0.getOpcode() == ISD::UADDO || + N0.getOpcode() == ISD::SADDO) { + SDValue A, Xor; + + if (isBitwiseNot(N0.getOperand(0))) { + A = N0.getOperand(1); + Xor = N0.getOperand(0); + } else if (isBitwiseNot(N0.getOperand(1))) { + A = N0.getOperand(0); + Xor = N0.getOperand(1); + } + + if (Xor) + return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0)); + } + } if (SDValue Combined = visitADDLike(N0, N1, N)) return Combined; diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll index dcb34eda826..72bf912e889 100644 --- a/test/CodeGen/X86/add.ll +++ b/test/CodeGen/X86/add.ll @@ -482,25 +482,19 @@ define i32 @add_to_sub(i32 %a, i32 %b) { ; X32-LABEL: add_to_sub: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: notl %ecx -; X32-NEXT: leal 1(%ecx,%eax), %eax +; X32-NEXT: subl {{[0-9]+}}(%esp), %eax ; X32-NEXT: retl ; ; X64-LINUX-LABEL: add_to_sub: ; X64-LINUX: # %bb.0: -; X64-LINUX-NEXT: # kill: def $esi killed $esi def $rsi -; X64-LINUX-NEXT: # kill: def $edi killed $edi def $rdi -; X64-LINUX-NEXT: notl %edi -; X64-LINUX-NEXT: leal 1(%rdi,%rsi), %eax +; X64-LINUX-NEXT: movl %esi, %eax +; X64-LINUX-NEXT: subl %edi, %eax ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: add_to_sub: ; X64-WIN32: # %bb.0: -; X64-WIN32-NEXT: # kill: def $edx killed $edx def $rdx -; X64-WIN32-NEXT: # kill: def $ecx killed $ecx def $rcx -; X64-WIN32-NEXT: notl %ecx -; X64-WIN32-NEXT: leal 1(%rcx,%rdx), %eax +; X64-WIN32-NEXT: movl %edx, %eax +; X64-WIN32-NEXT: subl %ecx, %eax ; X64-WIN32-NEXT: retq %nota = xor i32 %a, -1 %add = add i32 %nota, %b diff --git a/test/CodeGen/X86/combine-add.ll b/test/CodeGen/X86/combine-add.ll index afa5aa364b8..6f5f1370e6b 100644 --- a/test/CodeGen/X86/combine-add.ll +++ b/test/CodeGen/X86/combine-add.ll @@ -350,22 +350,18 @@ define <4 x i32> @combine_vec_add_sextinreg(<4 x i32> %a0, <4 x i32> %a1) { ret <4 x i32> %3 } -; TODO: (add (add (xor a, -1), b), 1) -> (sub b, a) +; (add (add (xor a, -1), b), 1) -> (sub b, a) define i32 @combine_add_add_not(i32 %a, i32 %b) { ; SSE-LABEL: combine_add_add_not: ; SSE: # %bb.0: -; SSE-NEXT: # kill: def $esi killed $esi def $rsi -; SSE-NEXT: # kill: def $edi killed $edi def $rdi -; SSE-NEXT: notl %edi -; SSE-NEXT: leal 1(%rdi,%rsi), %eax +; SSE-NEXT: movl %esi, %eax +; SSE-NEXT: subl %edi, %eax ; SSE-NEXT: retq ; ; AVX-LABEL: combine_add_add_not: ; AVX: # %bb.0: -; AVX-NEXT: # kill: def $esi killed $esi def $rsi -; AVX-NEXT: # kill: def $edi killed $edi def $rdi -; AVX-NEXT: notl %edi -; AVX-NEXT: leal 1(%rdi,%rsi), %eax +; AVX-NEXT: movl %esi, %eax +; AVX-NEXT: subl %edi, %eax ; AVX-NEXT: retq %nota = xor i32 %a, -1 %add = add i32 %nota, %b @@ -376,18 +372,13 @@ define i32 @combine_add_add_not(i32 %a, i32 %b) { define <4 x i32> @combine_vec_add_add_not(<4 x i32> %a, <4 x i32> %b) { ; SSE-LABEL: combine_vec_add_add_not: ; SSE: # %bb.0: -; SSE-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE-NEXT: pxor %xmm2, %xmm0 -; SSE-NEXT: paddd %xmm1, %xmm0 -; SSE-NEXT: psubd %xmm2, %xmm0 +; SSE-NEXT: psubd %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_add_not: ; AVX: # %bb.0: -; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpsubd %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq %nota = xor <4 x i32> %a, %add = add <4 x i32> %nota, %b -- 2.40.0