From 58c76e2b4efdebc1a5b767c64f6f7e568c17a42b Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Thu, 30 May 2019 20:37:18 +0000 Subject: [PATCH] [DAGCombiner][X86][AArch64][SPARC][SystemZ] y - (x + C) -> (y - x) - C fold. Try 3 Summary: Direct sibling of D62223 patch. While i don't have a direct motivational pattern for this, it would seem to make sense to handle both patterns (or none), for symmetry? The aarch64 changes look neutral; sparc and systemz look like improvement (one less instruction each); x86 changes - 32bit case improves, 64bit case shows that LEA no longer gets constructed, which may be because that whole test is `-mattr=+slow-lea,+slow-3ops-lea` https://rise4fun.com/Alive/ffh This is a recommit, originally committed in rL361852, but reverted to investigate test-suite compile-time hangs, and then reverted in rL362109 to fix missing constant folds that were causing endless combine loops. Reviewers: RKSimon, craig.topper, spatel, t.p.northover Reviewed By: t.p.northover Subscribers: t.p.northover, jyknight, javed.absar, kristof.beyls, fedor.sergeev, jrtc27, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62252 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362143 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 +++++ test/CodeGen/AArch64/shift-amount-mod.ll | 4 +-- test/CodeGen/AArch64/sink-addsub-of-const.ll | 12 ++++----- test/CodeGen/SPARC/2013-05-17-CallFrame.ll | 5 ++-- test/CodeGen/SystemZ/alloca-03.ll | 11 ++++---- test/CodeGen/X86/sink-addsub-of-const.ll | 28 ++++++++++---------- 6 files changed, 35 insertions(+), 31 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index dc4f7f2d590..10ae53a723d 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2970,6 +2970,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1); return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1)); } + // y - (x + C) -> (y - x) - C + if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD && + isConstantOrConstantVector(N1.getOperand(1))) { + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0)); + return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1)); + } // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1' // rather than 'sub 0/1' (the sext should get folded). diff --git a/test/CodeGen/AArch64/shift-amount-mod.ll b/test/CodeGen/AArch64/shift-amount-mod.ll index d349eb09f73..c91700436bb 100644 --- a/test/CodeGen/AArch64/shift-amount-mod.ll +++ b/test/CodeGen/AArch64/shift-amount-mod.ll @@ -513,7 +513,7 @@ define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounw define i32 @reg32_lshr_by_b_sub_negated_unfolded(i32 %val, i32 %a, i32 %b) nounwind { ; CHECK-LABEL: reg32_lshr_by_b_sub_negated_unfolded: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w1, w2 +; CHECK-NEXT: add w8, w2, w1 ; CHECK-NEXT: lsr w0, w0, w8 ; CHECK-NEXT: ret %nega = sub i32 0, %a @@ -525,7 +525,7 @@ define i32 @reg32_lshr_by_b_sub_negated_unfolded(i32 %val, i32 %a, i32 %b) nounw define i64 @reg64_lshr_by_b_sub_negated_unfolded(i64 %val, i64 %a, i64 %b) nounwind { ; CHECK-LABEL: reg64_lshr_by_b_sub_negated_unfolded: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x1, x2 +; CHECK-NEXT: add x8, x2, x1 ; CHECK-NEXT: lsr x0, x0, x8 ; CHECK-NEXT: ret %nega = sub i64 0, %a diff --git a/test/CodeGen/AArch64/sink-addsub-of-const.ll b/test/CodeGen/AArch64/sink-addsub-of-const.ll index 8886954623f..167ca6a10ec 100644 --- a/test/CodeGen/AArch64/sink-addsub-of-const.ll +++ b/test/CodeGen/AArch64/sink-addsub-of-const.ll @@ -107,9 +107,9 @@ define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b, i32 %c) { define i32 @sink_add_of_const_to_sub2(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: sink_add_of_const_to_sub2: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w0, w1 -; CHECK-NEXT: add w8, w8, #32 // =32 -; CHECK-NEXT: sub w0, w2, w8 +; CHECK-NEXT: sub w8, w1, w0 +; CHECK-NEXT: add w8, w2, w8 +; CHECK-NEXT: sub w0, w8, #32 // =32 ; CHECK-NEXT: ret %t0 = sub i32 %a, %b %t1 = add i32 %t0, 32 ; constant always on RHS @@ -295,9 +295,9 @@ define <4 x i32> @vec_sink_add_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4 x ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI19_0 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI19_0] -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: add v0.4s, v0.4s, v3.4s -; CHECK-NEXT: sub v0.4s, v2.4s, v0.4s +; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s +; CHECK-NEXT: add v0.4s, v2.4s, v0.4s +; CHECK-NEXT: sub v0.4s, v0.4s, v3.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %a, %b %t1 = add <4 x i32> %t0, ; constant always on RHS diff --git a/test/CodeGen/SPARC/2013-05-17-CallFrame.ll b/test/CodeGen/SPARC/2013-05-17-CallFrame.ll index 1a97e4e317e..274e99b114c 100644 --- a/test/CodeGen/SPARC/2013-05-17-CallFrame.ll +++ b/test/CodeGen/SPARC/2013-05-17-CallFrame.ll @@ -15,10 +15,9 @@ define void @variable_alloca_with_adj_call_stack(i32 %num) { ; V8-NEXT: .cfi_register 15, 31 ; V8-NEXT: add %i0, 7, %i0 ; V8-NEXT: and %i0, -8, %i0 -; V8-NEXT: add %i0, 8, %i0 ; V8-NEXT: sub %sp, %i0, %i0 -; V8-NEXT: add %i0, 96, %o0 -; V8-NEXT: mov %i0, %sp +; V8-NEXT: add %i0, -8, %sp +; V8-NEXT: add %i0, 88, %o0 ; V8-NEXT: add %sp, -16, %sp ; V8-NEXT: st %o0, [%sp+104] ; V8-NEXT: st %o0, [%sp+100] diff --git a/test/CodeGen/SystemZ/alloca-03.ll b/test/CodeGen/SystemZ/alloca-03.ll index 343071211b7..cac569ff41f 100644 --- a/test/CodeGen/SystemZ/alloca-03.ll +++ b/test/CodeGen/SystemZ/alloca-03.ll @@ -75,13 +75,12 @@ define void @f3(i64 %len) { ; CHECK-NEXT: lgr %r11, %r15 ; CHECK-NEXT: .cfi_def_cfa_register %r11 ; CHECK-NEXT: lgr %r1, %r15 -; CHECK-NEXT: sllg %r2, %r2, 3 -; CHECK-NEXT: la %r0, 120(%r2) +; CHECK-NEXT: sllg %r0, %r2, 3 ; CHECK-NEXT: sgr %r1, %r0 -; CHECK-NEXT: la %r2, 280(%r1) -; CHECK-NEXT: nill %r2, 65408 -; CHECK-NEXT: lgr %r15, %r1 -; CHECK-NEXT: mvghi 0(%r2), 10 +; CHECK-NEXT: lay %r15, -120(%r1) +; CHECK-NEXT: la %r1, 160(%r1) +; CHECK-NEXT: nill %r1, 65408 +; CHECK-NEXT: mvghi 0(%r1), 10 ; CHECK-NEXT: lmg %r11, %r15, 248(%r11) ; CHECK-NEXT: br %r14 %x = alloca i64, i64 %len, align 128 diff --git a/test/CodeGen/X86/sink-addsub-of-const.ll b/test/CodeGen/X86/sink-addsub-of-const.ll index 37a3dcbd0e4..59a42ad9e39 100644 --- a/test/CodeGen/X86/sink-addsub-of-const.ll +++ b/test/CodeGen/X86/sink-addsub-of-const.ll @@ -178,16 +178,16 @@ define i32 @sink_add_of_const_to_sub2(i32 %a, i32 %b, i32 %c) { ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: addl $32, %ecx -; X32-NEXT: subl %ecx, %eax +; X32-NEXT: addl %ecx, %eax +; X32-NEXT: addl $-32, %eax ; X32-NEXT: retl ; ; X64-LABEL: sink_add_of_const_to_sub2: ; X64: # %bb.0: -; X64-NEXT: movl %edx, %eax -; X64-NEXT: subl %esi, %edi -; X64-NEXT: addl $32, %edi -; X64-NEXT: subl %edi, %eax +; X64-NEXT: # kill: def $edx killed $edx def $rdx +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: subl %edi, %esi +; X64-NEXT: leal -32(%rdx,%rsi), %eax ; X64-NEXT: retq %t0 = sub i32 %a, %b %t1 = add i32 %t0, 32 ; constant always on RHS @@ -434,18 +434,18 @@ define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x define <4 x i32> @vec_sink_add_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; X32-LABEL: vec_sink_add_of_const_to_sub2: ; X32: # %bb.0: -; X32-NEXT: psubd %xmm1, %xmm0 -; X32-NEXT: paddd {{\.LCPI.*}}, %xmm0 -; X32-NEXT: psubd %xmm0, %xmm2 -; X32-NEXT: movdqa %xmm2, %xmm0 +; X32-NEXT: psubd %xmm0, %xmm1 +; X32-NEXT: paddd %xmm2, %xmm1 +; X32-NEXT: psubd {{\.LCPI.*}}, %xmm1 +; X32-NEXT: movdqa %xmm1, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: vec_sink_add_of_const_to_sub2: ; X64: # %bb.0: -; X64-NEXT: psubd %xmm1, %xmm0 -; X64-NEXT: paddd {{.*}}(%rip), %xmm0 -; X64-NEXT: psubd %xmm0, %xmm2 -; X64-NEXT: movdqa %xmm2, %xmm0 +; X64-NEXT: psubd %xmm0, %xmm1 +; X64-NEXT: paddd %xmm2, %xmm1 +; X64-NEXT: psubd {{.*}}(%rip), %xmm1 +; X64-NEXT: movdqa %xmm1, %xmm0 ; X64-NEXT: retq %t0 = sub <4 x i32> %a, %b %t1 = add <4 x i32> %t0, ; constant always on RHS -- 2.40.0