From: Craig Topper Date: Wed, 25 Jul 2018 01:15:38 +0000 (+0000) Subject: [X86] Use a shift plus an lea for multiplying by a constant that is a power of 2... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0bca0d9f0651efb85988da69d34431d0447af5f1;p=llvm [X86] Use a shift plus an lea for multiplying by a constant that is a power of 2 plus 2/4/8. The LEA allows us to combine an add and the multiply by 2/4/8 together so we just need a shift for the larger power of 2. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@337875 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index fc29069ffdd..c7deffd5619 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -33749,6 +33749,24 @@ static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), combineMulMulAddOrSub(9, 3, /*isAdd*/ true)); } + + // Another trick. If this is a power 2 + 2/4/8, we can use a shift followed + // by a single LEA. + // First check if this a sum of two power of 2s because that's easy. Then + // count how many zeros are up to the first bit. + // TODO: We can do this even without LEA at a cost of two shifts and an add. + if (isPowerOf2_64(MulAmt & (MulAmt - 1))) { + unsigned ScaleShift = countTrailingZeros(MulAmt); + if (ScaleShift >= 1 && ScaleShift < 4) { + unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1))); + SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(ShiftAmt, DL, MVT::i8)); + SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(ScaleShift, DL, MVT::i8)); + return DAG.getNode(ISD::ADD, DL, VT, Shift1, Shift2); + } + } + return SDValue(); } diff --git a/test/CodeGen/X86/mul-constant-i16.ll b/test/CodeGen/X86/mul-constant-i16.ll index 305867c8ad8..737bcc7c864 100644 --- a/test/CodeGen/X86/mul-constant-i16.ll +++ b/test/CodeGen/X86/mul-constant-i16.ll @@ -705,11 +705,10 @@ define i16 @test_mul_by_62(i16 %x) { define i16 @test_mul_by_66(i16 %x) { ; X86-LABEL: test_mul_by_66: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: shll $6, %eax -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: addl %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shll $6, %ecx +; X86-NEXT: leal (%ecx,%eax,2), %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; @@ -718,8 +717,7 @@ define i16 @test_mul_by_66(i16 %x) { ; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: movl %edi, %eax ; X64-NEXT: shll $6, %eax -; X64-NEXT: leal (%rax,%rdi), %eax -; X64-NEXT: addl %edi, %eax +; X64-NEXT: leal (%rax,%rdi,2), %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %mul = mul nsw i16 %x, 66 @@ -746,6 +744,28 @@ define i16 @test_mul_by_73(i16 %x) { ret i16 %mul } +define i16 @test_mul_by_520(i16 %x) { +; X86-LABEL: test_mul_by_520: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shll $9, %ecx +; X86-NEXT: leal (%ecx,%eax,8), %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; X64-LABEL: test_mul_by_520: +; X64: # %bb.0: +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: movl %edi, %eax +; X64-NEXT: shll $9, %eax +; X64-NEXT: leal (%rax,%rdi,8), %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq + %mul = mul nsw i16 %x, 520 + ret i16 %mul +} + ; (x*9+42)*(x*5+2) define i16 @test_mul_spec(i16 %x) nounwind { ; X86-LABEL: test_mul_spec: diff --git a/test/CodeGen/X86/mul-constant-i32.ll b/test/CodeGen/X86/mul-constant-i32.ll index 658a6538bbd..79b13a208cb 100644 --- a/test/CodeGen/X86/mul-constant-i32.ll +++ b/test/CodeGen/X86/mul-constant-i32.ll @@ -1836,11 +1836,10 @@ define i32 @test_mul_by_62(i32 %x) { define i32 @test_mul_by_66(i32 %x) { ; X86-LABEL: test_mul_by_66: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: shll $6, %eax -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: addl %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shll $6, %ecx +; X86-NEXT: leal (%ecx,%eax,2), %eax ; X86-NEXT: retl ; ; X64-HSW-LABEL: test_mul_by_66: @@ -1848,8 +1847,7 @@ define i32 @test_mul_by_66(i32 %x) { ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] ; X64-HSW-NEXT: shll $6, %eax # sched: [1:0.50] -; X64-HSW-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50] -; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: leal (%rax,%rdi,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [7:1.00] ; ; X64-JAG-LABEL: test_mul_by_66: @@ -1857,8 +1855,7 @@ define i32 @test_mul_by_66(i32 %x) { ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi ; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50] ; X64-JAG-NEXT: shll $6, %eax # sched: [1:0.50] -; X64-JAG-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50] -; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rdi,2), %eax # sched: [2:1.00] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_66: @@ -1943,6 +1940,63 @@ define i32 @test_mul_by_73(i32 %x) { ret i32 %mul } +define i32 @test_mul_by_520(i32 %x) { +; X86-LABEL: test_mul_by_520: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shll $9, %ecx +; X86-NEXT: leal (%ecx,%eax,8), %eax +; X86-NEXT: retl +; +; X64-HSW-LABEL: test_mul_by_520: +; X64-HSW: # %bb.0: +; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi +; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: shll $9, %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rax,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [7:1.00] +; +; X64-JAG-LABEL: test_mul_by_520: +; X64-JAG: # %bb.0: +; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi +; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: shll $9, %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rdi,8), %eax # sched: [2:1.00] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_520: +; X86-NOOPT: # %bb.0: +; X86-NOOPT-NEXT: imull $520, {{[0-9]+}}(%esp), %eax # imm = 0x208 +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_520: +; HSW-NOOPT: # %bb.0: +; HSW-NOOPT-NEXT: imull $520, %edi, %eax # imm = 0x208 +; HSW-NOOPT-NEXT: # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_520: +; JAG-NOOPT: # %bb.0: +; JAG-NOOPT-NEXT: imull $520, %edi, %eax # imm = 0x208 +; JAG-NOOPT-NEXT: # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_520: +; X64-SLM: # %bb.0: +; X64-SLM-NEXT: imull $520, %edi, %eax # imm = 0x208 +; X64-SLM-NEXT: # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_520: +; SLM-NOOPT: # %bb.0: +; SLM-NOOPT-NEXT: imull $520, %edi, %eax # imm = 0x208 +; SLM-NOOPT-NEXT: # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] + %mul = mul nsw i32 %x, 520 + ret i32 %mul +} + ; (x*9+42)*(x*5+2) define i32 @test_mul_spec(i32 %x) nounwind { ; X86-LABEL: test_mul_spec: diff --git a/test/CodeGen/X86/mul-constant-i64.ll b/test/CodeGen/X86/mul-constant-i64.ll index e0793380ab9..332ad7f0129 100644 --- a/test/CodeGen/X86/mul-constant-i64.ll +++ b/test/CodeGen/X86/mul-constant-i64.ll @@ -1938,8 +1938,7 @@ define i64 @test_mul_by_66(i64 %x) { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shll $6, %ecx -; X86-NEXT: addl %eax, %ecx -; X86-NEXT: addl %eax, %ecx +; X86-NEXT: leal (%ecx,%eax,2), %ecx ; X86-NEXT: movl $66, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: addl %ecx, %edx @@ -1949,16 +1948,14 @@ define i64 @test_mul_by_66(i64 %x) { ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: shlq $6, %rax # sched: [1:0.50] -; X64-HSW-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50] -; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: leaq (%rax,%rdi,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [7:1.00] ; ; X64-JAG-LABEL: test_mul_by_66: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50] ; X64-JAG-NEXT: shlq $6, %rax # sched: [1:0.50] -; X64-JAG-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50] -; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rdi,2), %rax # sched: [2:1.00] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_66: @@ -2049,6 +2046,67 @@ define i64 @test_mul_by_73(i64 %x) { ret i64 %mul } +define i64 @test_mul_by_520(i64 %x) { +; X86-LABEL: test_mul_by_520: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shll $9, %ecx +; X86-NEXT: leal (%ecx,%eax,8), %ecx +; X86-NEXT: movl $520, %eax # imm = 0x208 +; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: addl %ecx, %edx +; X86-NEXT: retl +; +; X64-HSW-LABEL: test_mul_by_520: +; X64-HSW: # %bb.0: +; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: shlq $9, %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rax,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [7:1.00] +; +; X64-JAG-LABEL: test_mul_by_520: +; X64-JAG: # %bb.0: +; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: shlq $9, %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rdi,8), %rax # sched: [2:1.00] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_520: +; X86-NOOPT: # %bb.0: +; X86-NOOPT-NEXT: movl $520, %eax # imm = 0x208 +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $520, {{[0-9]+}}(%esp), %ecx # imm = 0x208 +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_520: +; HSW-NOOPT: # %bb.0: +; HSW-NOOPT-NEXT: imulq $520, %rdi, %rax # imm = 0x208 +; HSW-NOOPT-NEXT: # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_520: +; JAG-NOOPT: # %bb.0: +; JAG-NOOPT-NEXT: imulq $520, %rdi, %rax # imm = 0x208 +; JAG-NOOPT-NEXT: # sched: [6:4.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_520: +; X64-SLM: # %bb.0: +; X64-SLM-NEXT: imulq $520, %rdi, %rax # imm = 0x208 +; X64-SLM-NEXT: # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_520: +; SLM-NOOPT: # %bb.0: +; SLM-NOOPT-NEXT: imulq $520, %rdi, %rax # imm = 0x208 +; SLM-NOOPT-NEXT: # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] + %mul = mul nsw i64 %x, 520 + ret i64 %mul +} + ; (x*9+42)*(x*5+2) define i64 @test_mul_spec(i64 %x) nounwind { ; X86-LABEL: test_mul_spec: