Complexity += 2;
}
+ // Heuristic: try harder to form an LEA from ADD if the operands set flags.
+ // Unlike ADD, LEA does not affect flags, so we will be less likely to require
+ // duplicating flag-producing instructions later in the pipeline.
+ if (N.getOpcode() == ISD::ADD) {
+ auto isMathWithFlags = [](SDValue V) {
+ switch (V.getOpcode()) {
+ case X86ISD::ADD:
+ case X86ISD::SUB:
+ case X86ISD::ADC:
+ case X86ISD::SBB:
+ /* TODO: These opcodes can be added safely, but we may want to justify
+ their inclusion for different reasons (better for reg-alloc).
+ case X86ISD::SMUL:
+ case X86ISD::UMUL:
+ case X86ISD::OR:
+ case X86ISD::XOR:
+ case X86ISD::AND:
+ */
+ // Value 1 is the flag output of the node - verify it's not dead.
+ return !SDValue(V.getNode(), 1).use_empty();
+ default:
+ return false;
+ }
+ };
+ // TODO: This could be an 'or' rather than 'and' to make the transform more
+ // likely to happen. We might want to factor in whether there's a
+ // load folding opportunity for the math op that disappears with LEA.
+ if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1)))
+ Complexity++;
+ }
+
if (AM.Disp)
Complexity++;
define i32 @PR40483_sub6(i32*, i32) nounwind {
; X86-LABEL: PR40483_sub6:
; X86: # %bb.0:
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl (%edx), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %esi, %ecx
-; X86-NEXT: subl %edi, %ecx
+; X86-NEXT: movl (%edx), %ecx
; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: subl %edi, %esi
-; X86-NEXT: movl %esi, (%edx)
+; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, (%edx)
; X86-NEXT: jae .LBB8_2
; X86-NEXT: # %bb.1:
-; X86-NEXT: addl %ecx, %ecx
-; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: leal (%ecx,%ecx), %eax
; X86-NEXT: .LBB8_2:
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: PR40483_sub6:
; X64: # %bb.0:
-; X64-NEXT: movl (%rdi), %ecx
-; X64-NEXT: movl %ecx, %edx
-; X64-NEXT: subl %esi, %edx
-; X64-NEXT: addl %edx, %edx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: subl %esi, %ecx
-; X64-NEXT: movl %ecx, (%rdi)
-; X64-NEXT: cmovbl %edx, %eax
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: xorl %ecx, %ecx
+; X64-NEXT: subl %esi, %eax
+; X64-NEXT: movl %eax, (%rdi)
+; X64-NEXT: leal (%rax,%rax), %eax
+; X64-NEXT: cmovael %ecx, %eax
; X64-NEXT: retq
%3 = load i32, i32* %0, align 8
%4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1)