return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
combineMulMulAddOrSub(9, 3, /*isAdd*/ true));
}
+
+ // Another trick. If this is a power 2 + 2/4/8, we can use a shift followed
+ // by a single LEA.
+ // First check if this a sum of two power of 2s because that's easy. Then
+ // count how many zeros are up to the first bit.
+ // TODO: We can do this even without LEA at a cost of two shifts and an add.
+ if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
+ unsigned ScaleShift = countTrailingZeros(MulAmt);
+ if (ScaleShift >= 1 && ScaleShift < 4) {
+ unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
+ SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(ShiftAmt, DL, MVT::i8));
+ SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(ScaleShift, DL, MVT::i8));
+ return DAG.getNode(ISD::ADD, DL, VT, Shift1, Shift2);
+ }
+ }
+
return SDValue();
}
define i16 @test_mul_by_66(i16 %x) {
; X86-LABEL: test_mul_by_66:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: shll $6, %eax
-; X86-NEXT: addl %ecx, %eax
-; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shll $6, %ecx
+; X86-NEXT: leal (%ecx,%eax,2), %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: movl %edi, %eax
; X64-NEXT: shll $6, %eax
-; X64-NEXT: leal (%rax,%rdi), %eax
-; X64-NEXT: addl %edi, %eax
+; X64-NEXT: leal (%rax,%rdi,2), %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%mul = mul nsw i16 %x, 66
ret i16 %mul
}
+define i16 @test_mul_by_520(i16 %x) {
+; X86-LABEL: test_mul_by_520:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shll $9, %ecx
+; X86-NEXT: leal (%ecx,%eax,8), %eax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mul_by_520:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: shll $9, %eax
+; X64-NEXT: leal (%rax,%rdi,8), %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: retq
+ %mul = mul nsw i16 %x, 520
+ ret i16 %mul
+}
+
; (x*9+42)*(x*5+2)
define i16 @test_mul_spec(i16 %x) nounwind {
; X86-LABEL: test_mul_spec:
define i32 @test_mul_by_66(i32 %x) {
; X86-LABEL: test_mul_by_66:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: shll $6, %eax
-; X86-NEXT: addl %ecx, %eax
-; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shll $6, %ecx
+; X86-NEXT: leal (%ecx,%eax,2), %eax
; X86-NEXT: retl
;
; X64-HSW-LABEL: test_mul_by_66:
; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: shll $6, %eax # sched: [1:0.50]
-; X64-HSW-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: leal (%rax,%rdi,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [7:1.00]
;
; X64-JAG-LABEL: test_mul_by_66:
; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50]
; X64-JAG-NEXT: shll $6, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50]
-; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rax,%rdi,2), %eax # sched: [2:1.00]
; X64-JAG-NEXT: retq # sched: [4:1.00]
;
; X86-NOOPT-LABEL: test_mul_by_66:
ret i32 %mul
}
+define i32 @test_mul_by_520(i32 %x) {
+; X86-LABEL: test_mul_by_520:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shll $9, %ecx
+; X86-NEXT: leal (%ecx,%eax,8), %eax
+; X86-NEXT: retl
+;
+; X64-HSW-LABEL: test_mul_by_520:
+; X64-HSW: # %bb.0:
+; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: shll $9, %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rax,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [7:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_520:
+; X64-JAG: # %bb.0:
+; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: shll $9, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rax,%rdi,8), %eax # sched: [2:1.00]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_520:
+; X86-NOOPT: # %bb.0:
+; X86-NOOPT-NEXT: imull $520, {{[0-9]+}}(%esp), %eax # imm = 0x208
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_520:
+; HSW-NOOPT: # %bb.0:
+; HSW-NOOPT-NEXT: imull $520, %edi, %eax # imm = 0x208
+; HSW-NOOPT-NEXT: # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_520:
+; JAG-NOOPT: # %bb.0:
+; JAG-NOOPT-NEXT: imull $520, %edi, %eax # imm = 0x208
+; JAG-NOOPT-NEXT: # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_520:
+; X64-SLM: # %bb.0:
+; X64-SLM-NEXT: imull $520, %edi, %eax # imm = 0x208
+; X64-SLM-NEXT: # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_520:
+; SLM-NOOPT: # %bb.0:
+; SLM-NOOPT-NEXT: imull $520, %edi, %eax # imm = 0x208
+; SLM-NOOPT-NEXT: # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+ %mul = mul nsw i32 %x, 520
+ ret i32 %mul
+}
+
; (x*9+42)*(x*5+2)
define i32 @test_mul_spec(i32 %x) nounwind {
; X86-LABEL: test_mul_spec:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: shll $6, %ecx
-; X86-NEXT: addl %eax, %ecx
-; X86-NEXT: addl %eax, %ecx
+; X86-NEXT: leal (%ecx,%eax,2), %ecx
; X86-NEXT: movl $66, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: addl %ecx, %edx
; X64-HSW: # %bb.0:
; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: shlq $6, %rax # sched: [1:0.50]
-; X64-HSW-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: leaq (%rax,%rdi,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [7:1.00]
;
; X64-JAG-LABEL: test_mul_by_66:
; X64-JAG: # %bb.0:
; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50]
; X64-JAG-NEXT: shlq $6, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50]
-; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rax,%rdi,2), %rax # sched: [2:1.00]
; X64-JAG-NEXT: retq # sched: [4:1.00]
;
; X86-NOOPT-LABEL: test_mul_by_66:
ret i64 %mul
}
+define i64 @test_mul_by_520(i64 %x) {
+; X86-LABEL: test_mul_by_520:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shll $9, %ecx
+; X86-NEXT: leal (%ecx,%eax,8), %ecx
+; X86-NEXT: movl $520, %eax # imm = 0x208
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: retl
+;
+; X64-HSW-LABEL: test_mul_by_520:
+; X64-HSW: # %bb.0:
+; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: shlq $9, %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rax,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [7:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_520:
+; X64-JAG: # %bb.0:
+; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: shlq $9, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rax,%rdi,8), %rax # sched: [2:1.00]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_520:
+; X86-NOOPT: # %bb.0:
+; X86-NOOPT-NEXT: movl $520, %eax # imm = 0x208
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $520, {{[0-9]+}}(%esp), %ecx # imm = 0x208
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_520:
+; HSW-NOOPT: # %bb.0:
+; HSW-NOOPT-NEXT: imulq $520, %rdi, %rax # imm = 0x208
+; HSW-NOOPT-NEXT: # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_520:
+; JAG-NOOPT: # %bb.0:
+; JAG-NOOPT-NEXT: imulq $520, %rdi, %rax # imm = 0x208
+; JAG-NOOPT-NEXT: # sched: [6:4.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_520:
+; X64-SLM: # %bb.0:
+; X64-SLM-NEXT: imulq $520, %rdi, %rax # imm = 0x208
+; X64-SLM-NEXT: # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_520:
+; SLM-NOOPT: # %bb.0:
+; SLM-NOOPT-NEXT: imulq $520, %rdi, %rax # imm = 0x208
+; SLM-NOOPT-NEXT: # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+ %mul = mul nsw i64 %x, 520
+ ret i64 %mul
+}
+
; (x*9+42)*(x*5+2)
define i64 @test_mul_spec(i64 %x) nounwind {
; X86-LABEL: test_mul_spec: