From bd6cf0e2e7adcdbbc77c168291c624c57d945482 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 24 Jul 2018 20:31:48 +0000 Subject: [PATCH] [X86] Change multiply by 19 to use (9 * X) * 2 + X instead of (5 * X) * 4 - 1. The new lowering can be done in 2 LEAs. The old code took 1 LEA, 1 shift, and 1 sub. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@337851 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 4 ++-- test/CodeGen/X86/mul-constant-i16.ll | 12 +++++------- test/CodeGen/X86/mul-constant-i32.ll | 17 +++++++---------- test/CodeGen/X86/mul-constant-i64.ll | 15 ++++++--------- test/CodeGen/X86/mul-constant-result.ll | 16 ++++++++++------ 5 files changed, 30 insertions(+), 34 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ed34aeaccd8..6f7636c619d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -33721,8 +33721,8 @@ static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), combineMulShlAddOrSub(5, 2, /*isAdd*/ true)); case 19: - // mul x, 19 => sub ((shl (mul x, 5), 2), x) - return combineMulShlAddOrSub(5, 2, /*isAdd*/ false); + // mul x, 19 => add ((shl (mul x, 9), 1), x) + return combineMulShlAddOrSub(9, 1, /*isAdd*/ true); case 13: // mul x, 13 => add ((shl (mul x, 3), 2), x) return combineMulShlAddOrSub(3, 2, /*isAdd*/ true); diff --git a/test/CodeGen/X86/mul-constant-i16.ll b/test/CodeGen/X86/mul-constant-i16.ll index a90814cc2ce..fc2256081eb 100644 --- a/test/CodeGen/X86/mul-constant-i16.ll +++ b/test/CodeGen/X86/mul-constant-i16.ll @@ -348,19 +348,17 @@ define i16 @test_mul_by_18(i16 %x) { define i16 @test_mul_by_19(i16 %x) { ; X86-LABEL: test_mul_by_19: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: leal (%ecx,%ecx,4), %eax -; X86-NEXT: shll $2, %eax -; X86-NEXT: subl %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%eax,%ecx,2), %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_19: ; X64: # %bb.0: ; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: leal (%rdi,%rdi,4), %eax -; X64-NEXT: shll $2, %eax -; X64-NEXT: subl %edi, %eax +; X64-NEXT: leal (%rdi,%rdi,8), %eax +; X64-NEXT: leal (%rdi,%rax,2), %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %mul = mul nsw i16 %x, 19 diff --git a/test/CodeGen/X86/mul-constant-i32.ll b/test/CodeGen/X86/mul-constant-i32.ll index d80e69dd61b..2cd20db1b65 100644 --- a/test/CodeGen/X86/mul-constant-i32.ll +++ b/test/CodeGen/X86/mul-constant-i32.ll @@ -937,26 +937,23 @@ define i32 @test_mul_by_18(i32 %x) { define i32 @test_mul_by_19(i32 %x) { ; X86-LABEL: test_mul_by_19: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: leal (%ecx,%ecx,4), %eax -; X86-NEXT: shll $2, %eax -; X86-NEXT: subl %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%eax,%ecx,2), %eax ; X86-NEXT: retl ; ; X64-HSW-LABEL: test_mul_by_19: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: shll $2, %eax # sched: [1:0.50] -; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rax,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [7:1.00] ; ; X64-JAG-LABEL: test_mul_by_19: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00] -; X64-JAG-NEXT: shll $2, %eax # sched: [1:0.50] -; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [2:1.00] +; X64-JAG-NEXT: leal (%rdi,%rax,2), %eax # sched: [2:1.00] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_19: diff --git a/test/CodeGen/X86/mul-constant-i64.ll b/test/CodeGen/X86/mul-constant-i64.ll index cdbda9133ee..d7d962c785a 100644 --- a/test/CodeGen/X86/mul-constant-i64.ll +++ b/test/CodeGen/X86/mul-constant-i64.ll @@ -967,9 +967,8 @@ define i64 @test_mul_by_19(i64 %x) { ; X86-LABEL: test_mul_by_19: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: leal (%eax,%eax,4), %ecx -; X86-NEXT: shll $2, %ecx -; X86-NEXT: subl %eax, %ecx +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%eax,%ecx,2), %ecx ; X86-NEXT: movl $19, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: addl %ecx, %edx @@ -977,16 +976,14 @@ define i64 @test_mul_by_19(i64 %x) { ; ; X64-HSW-LABEL: test_mul_by_19: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: shlq $2, %rax # sched: [1:0.50] -; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rax,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [7:1.00] ; ; X64-JAG-LABEL: test_mul_by_19: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00] -; X64-JAG-NEXT: shlq $2, %rax # sched: [1:0.50] -; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [2:1.00] +; X64-JAG-NEXT: leaq (%rdi,%rax,2), %rax # sched: [2:1.00] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_19: diff --git a/test/CodeGen/X86/mul-constant-result.ll b/test/CodeGen/X86/mul-constant-result.ll index 70acb9ee7c7..0148e777155 100644 --- a/test/CodeGen/X86/mul-constant-result.ll +++ b/test/CodeGen/X86/mul-constant-result.ll @@ -144,10 +144,13 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 { ; X86-NEXT: retl ; X86-NEXT: .LBB0_25: ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: leal (%eax,%eax,4), %ecx -; X86-NEXT: shll $2, %ecx -; X86-NEXT: jmp .LBB0_12 +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%eax,%ecx,2), %eax +; X86-NEXT: popl %esi +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl ; X86-NEXT: .LBB0_26: +; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: shll $2, %eax ; X86-NEXT: leal (%eax,%eax,4), %eax ; X86-NEXT: popl %esi @@ -330,9 +333,10 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 { ; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax ; X64-HSW-NEXT: retq ; X64-HSW-NEXT: .LBB0_22: -; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx -; X64-HSW-NEXT: shll $2, %ecx -; X64-HSW-NEXT: jmp .LBB0_8 +; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx +; X64-HSW-NEXT: leal (%rax,%rcx,2), %eax +; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax +; X64-HSW-NEXT: retq ; X64-HSW-NEXT: .LBB0_23: ; X64-HSW-NEXT: shll $2, %eax ; X64-HSW-NEXT: leal (%rax,%rax,4), %eax -- 2.50.1