case ISD::XOR:
if (tryShrinkShlLogicImm(Node))
return;
- break;
+
+ LLVM_FALLTHROUGH;
+ case ISD::ADD:
+ case ISD::SUB: {
+ // Try to avoid folding immediates with multiple uses for optsize.
+ // This code tries to select to register form directly to avoid going
+ // through the isel table which might fold the immediate. We can't change
+ // the patterns on the add/sub/and/or/xor with immediate paterns in the
+ // tablegen files to check immediate use count without making the patterns
+ // unavailable to the fast-isel table.
+ if (!OptForSize)
+ break;
+
+ // Only handle i8/i16/i32/i64.
+ if (NVT != MVT::i8 && NVT != MVT::i16 && NVT != MVT::i32 && NVT != MVT::i64)
+ break;
+
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
+ if (!Cst)
+ break;
+
+ int64_t Val = Cst->getSExtValue();
+
+ // Make sure its an immediate that is considered foldable.
+ // FIXME: Handle unsigned 32 bit immediates for 64-bit AND.
+ if (!isInt<8>(Val) && !isInt<32>(Val))
+ break;
+
+ // Check if we should avoid folding this immediate.
+ if (!shouldAvoidImmediateInstFormsForSize(N1.getNode()))
+ break;
+
+ // We should not fold the immediate. So we need a register form instead.
+ unsigned ROpc, MOpc;
+ switch (NVT.SimpleTy) {
+ default: llvm_unreachable("Unexpected VT!");
+ case MVT::i8:
+ switch (Opcode) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case ISD::ADD: ROpc = X86::ADD8rr; MOpc = X86::ADD8rm; break;
+ case ISD::SUB: ROpc = X86::SUB8rr; MOpc = X86::SUB8rm; break;
+ case ISD::AND: ROpc = X86::AND8rr; MOpc = X86::AND8rm; break;
+ case ISD::OR: ROpc = X86::OR8rr; MOpc = X86::OR8rm; break;
+ case ISD::XOR: ROpc = X86::XOR8rr; MOpc = X86::XOR8rm; break;
+ }
+ break;
+ case MVT::i16:
+ switch (Opcode) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case ISD::ADD: ROpc = X86::ADD16rr; MOpc = X86::ADD16rm; break;
+ case ISD::SUB: ROpc = X86::SUB16rr; MOpc = X86::SUB16rm; break;
+ case ISD::AND: ROpc = X86::AND16rr; MOpc = X86::AND16rm; break;
+ case ISD::OR: ROpc = X86::OR16rr; MOpc = X86::OR16rm; break;
+ case ISD::XOR: ROpc = X86::XOR16rr; MOpc = X86::XOR16rm; break;
+ }
+ break;
+ case MVT::i32:
+ switch (Opcode) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case ISD::ADD: ROpc = X86::ADD32rr; MOpc = X86::ADD32rm; break;
+ case ISD::SUB: ROpc = X86::SUB32rr; MOpc = X86::SUB32rm; break;
+ case ISD::AND: ROpc = X86::AND32rr; MOpc = X86::AND32rm; break;
+ case ISD::OR: ROpc = X86::OR32rr; MOpc = X86::OR32rm; break;
+ case ISD::XOR: ROpc = X86::XOR32rr; MOpc = X86::XOR32rm; break;
+ }
+ break;
+ case MVT::i64:
+ switch (Opcode) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case ISD::ADD: ROpc = X86::ADD64rr; MOpc = X86::ADD64rm; break;
+ case ISD::SUB: ROpc = X86::SUB64rr; MOpc = X86::SUB64rm; break;
+ case ISD::AND: ROpc = X86::AND64rr; MOpc = X86::AND64rm; break;
+ case ISD::OR: ROpc = X86::OR64rr; MOpc = X86::OR64rm; break;
+ case ISD::XOR: ROpc = X86::XOR64rr; MOpc = X86::XOR64rm; break;
+ }
+ break;
+ }
+
+ // Ok this is a AND/OR/XOR/ADD/SUB with constant.
+
+ // If this is a not a subtract, we can still try to fold a load.
+ if (Opcode != ISD::SUB) {
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+ SDValue Ops[] = { N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
+ SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
+ MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
+ // Update the chain.
+ ReplaceUses(N0.getValue(1), SDValue(CNode, 2));
+ // Record the mem-refs
+ CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N0)->getMemOperand()});
+ ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
+ CurDAG->RemoveDeadNode(Node);
+ return;
+ }
+ }
+
+ CurDAG->SelectNodeTo(Node, ROpc, NVT, MVT::i32, N0, N1);
+ return;
+ }
case X86ISD::SMUL:
// i16/i32/i64 are handled with isel patterns.
; X32-NEXT: shrl %ecx
; X32-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X32-NEXT: subl %ecx, %eax
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: andl $858993459, %ecx # imm = 0x33333333
+; X32-NEXT: movl $858993459, %ecx # imm = 0x33333333
+; X32-NEXT: movl %eax, %edx
+; X32-NEXT: andl %ecx, %edx
; X32-NEXT: shrl $2, %eax
-; X32-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: andl %ecx, %eax
+; X32-NEXT: addl %edx, %eax
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrl $4, %ecx
; X32-NEXT: addl %eax, %ecx
; X64-NEXT: shrl %eax
; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555
; X64-NEXT: subl %eax, %edi
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: andl $858993459, %eax # imm = 0x33333333
+; X64-NEXT: movl $858993459, %eax # imm = 0x33333333
+; X64-NEXT: movl %edi, %ecx
+; X64-NEXT: andl %eax, %ecx
; X64-NEXT: shrl $2, %edi
-; X64-NEXT: andl $858993459, %edi # imm = 0x33333333
-; X64-NEXT: addl %eax, %edi
+; X64-NEXT: andl %eax, %edi
+; X64-NEXT: addl %ecx, %edi
; X64-NEXT: movl %edi, %eax
; X64-NEXT: shrl $4, %eax
; X64-NEXT: addl %edi, %eax
define i64 @cnt64_optsize(i64 %x) nounwind readnone optsize {
; X32-NOSSE-LABEL: cnt64_optsize:
; X32-NOSSE: # %bb.0:
+; X32-NOSSE-NEXT: pushl %ebx
+; X32-NOSSE-NEXT: pushl %edi
+; X32-NOSSE-NEXT: pushl %esi
; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NOSSE-NEXT: movl %ecx, %edx
; X32-NOSSE-NEXT: shrl %edx
-; X32-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555
+; X32-NOSSE-NEXT: movl $1431655765, %esi # imm = 0x55555555
+; X32-NOSSE-NEXT: andl %esi, %edx
; X32-NOSSE-NEXT: subl %edx, %ecx
-; X32-NOSSE-NEXT: movl %ecx, %edx
-; X32-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
+; X32-NOSSE-NEXT: movl $858993459, %edx # imm = 0x33333333
+; X32-NOSSE-NEXT: movl %ecx, %edi
+; X32-NOSSE-NEXT: andl %edx, %edi
; X32-NOSSE-NEXT: shrl $2, %ecx
-; X32-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; X32-NOSSE-NEXT: addl %edx, %ecx
-; X32-NOSSE-NEXT: movl %ecx, %edx
-; X32-NOSSE-NEXT: shrl $4, %edx
-; X32-NOSSE-NEXT: addl %ecx, %edx
-; X32-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
-; X32-NOSSE-NEXT: imull $16843009, %edx, %ecx # imm = 0x1010101
-; X32-NOSSE-NEXT: shrl $24, %ecx
-; X32-NOSSE-NEXT: movl %eax, %edx
-; X32-NOSSE-NEXT: shrl %edx
-; X32-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555
-; X32-NOSSE-NEXT: subl %edx, %eax
-; X32-NOSSE-NEXT: movl %eax, %edx
-; X32-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
+; X32-NOSSE-NEXT: andl %edx, %ecx
+; X32-NOSSE-NEXT: addl %edi, %ecx
+; X32-NOSSE-NEXT: movl %ecx, %edi
+; X32-NOSSE-NEXT: shrl $4, %edi
+; X32-NOSSE-NEXT: addl %ecx, %edi
+; X32-NOSSE-NEXT: movl $252645135, %ecx # imm = 0xF0F0F0F
+; X32-NOSSE-NEXT: andl %ecx, %edi
+; X32-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101
+; X32-NOSSE-NEXT: shrl $24, %edi
+; X32-NOSSE-NEXT: movl %eax, %ebx
+; X32-NOSSE-NEXT: shrl %ebx
+; X32-NOSSE-NEXT: andl %esi, %ebx
+; X32-NOSSE-NEXT: subl %ebx, %eax
+; X32-NOSSE-NEXT: movl %eax, %esi
+; X32-NOSSE-NEXT: andl %edx, %esi
; X32-NOSSE-NEXT: shrl $2, %eax
-; X32-NOSSE-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X32-NOSSE-NEXT: addl %edx, %eax
+; X32-NOSSE-NEXT: andl %edx, %eax
+; X32-NOSSE-NEXT: addl %esi, %eax
; X32-NOSSE-NEXT: movl %eax, %edx
; X32-NOSSE-NEXT: shrl $4, %edx
; X32-NOSSE-NEXT: addl %eax, %edx
-; X32-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
+; X32-NOSSE-NEXT: andl %ecx, %edx
; X32-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101
; X32-NOSSE-NEXT: shrl $24, %eax
-; X32-NOSSE-NEXT: addl %ecx, %eax
+; X32-NOSSE-NEXT: addl %edi, %eax
; X32-NOSSE-NEXT: xorl %edx, %edx
+; X32-NOSSE-NEXT: popl %esi
+; X32-NOSSE-NEXT: popl %edi
+; X32-NOSSE-NEXT: popl %ebx
; X32-NOSSE-NEXT: retl
;
; X64-LABEL: cnt64_optsize:
define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
; X32-NOSSE-LABEL: cnt128_optsize:
; X32-NOSSE: # %bb.0:
+; X32-NOSSE-NEXT: pushl %ebp
; X32-NOSSE-NEXT: pushl %ebx
; X32-NOSSE-NEXT: pushl %edi
; X32-NOSSE-NEXT: pushl %esi
-; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X32-NOSSE-NEXT: movl %edi, %ebx
+; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X32-NOSSE-NEXT: movl %ebx, %ecx
+; X32-NOSSE-NEXT: shrl %ecx
+; X32-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555
+; X32-NOSSE-NEXT: andl %edi, %ecx
+; X32-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555
+; X32-NOSSE-NEXT: subl %ecx, %ebx
+; X32-NOSSE-NEXT: movl $858993459, %ecx # imm = 0x33333333
+; X32-NOSSE-NEXT: movl %ebx, %ebp
+; X32-NOSSE-NEXT: andl %ecx, %ebp
+; X32-NOSSE-NEXT: shrl $2, %ebx
+; X32-NOSSE-NEXT: andl %ecx, %ebx
+; X32-NOSSE-NEXT: addl %ebp, %ebx
+; X32-NOSSE-NEXT: movl %ebx, %ebp
+; X32-NOSSE-NEXT: shrl $4, %ebp
+; X32-NOSSE-NEXT: addl %ebx, %ebp
+; X32-NOSSE-NEXT: movl %eax, %ebx
; X32-NOSSE-NEXT: shrl %ebx
-; X32-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555
-; X32-NOSSE-NEXT: subl %ebx, %edi
-; X32-NOSSE-NEXT: movl %edi, %ebx
-; X32-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333
-; X32-NOSSE-NEXT: shrl $2, %edi
-; X32-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
-; X32-NOSSE-NEXT: addl %ebx, %edi
-; X32-NOSSE-NEXT: movl %edi, %ebx
-; X32-NOSSE-NEXT: shrl $4, %ebx
-; X32-NOSSE-NEXT: addl %edi, %ebx
-; X32-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F
-; X32-NOSSE-NEXT: imull $16843009, %ebx, %edi # imm = 0x1010101
+; X32-NOSSE-NEXT: andl %edi, %ebx
+; X32-NOSSE-NEXT: subl %ebx, %eax
+; X32-NOSSE-NEXT: movl %eax, %ebx
+; X32-NOSSE-NEXT: andl %ecx, %ebx
+; X32-NOSSE-NEXT: shrl $2, %eax
+; X32-NOSSE-NEXT: andl %ecx, %eax
+; X32-NOSSE-NEXT: addl %ebx, %eax
+; X32-NOSSE-NEXT: movl %eax, %edi
+; X32-NOSSE-NEXT: shrl $4, %edi
+; X32-NOSSE-NEXT: addl %eax, %edi
+; X32-NOSSE-NEXT: movl $252645135, %ebx # imm = 0xF0F0F0F
+; X32-NOSSE-NEXT: andl %ebx, %ebp
+; X32-NOSSE-NEXT: imull $16843009, %ebp, %eax # imm = 0x1010101
+; X32-NOSSE-NEXT: shrl $24, %eax
+; X32-NOSSE-NEXT: andl %ebx, %edi
+; X32-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101
; X32-NOSSE-NEXT: shrl $24, %edi
-; X32-NOSSE-NEXT: movl %esi, %ebx
-; X32-NOSSE-NEXT: shrl %ebx
-; X32-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555
-; X32-NOSSE-NEXT: subl %ebx, %esi
-; X32-NOSSE-NEXT: movl %esi, %ebx
-; X32-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333
+; X32-NOSSE-NEXT: addl %eax, %edi
+; X32-NOSSE-NEXT: movl %esi, %eax
+; X32-NOSSE-NEXT: shrl %eax
+; X32-NOSSE-NEXT: movl $1431655765, %ebp # imm = 0x55555555
+; X32-NOSSE-NEXT: andl %ebp, %eax
+; X32-NOSSE-NEXT: subl %eax, %esi
+; X32-NOSSE-NEXT: movl %esi, %eax
+; X32-NOSSE-NEXT: andl %ecx, %eax
; X32-NOSSE-NEXT: shrl $2, %esi
-; X32-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333
-; X32-NOSSE-NEXT: addl %ebx, %esi
-; X32-NOSSE-NEXT: movl %esi, %ebx
-; X32-NOSSE-NEXT: shrl $4, %ebx
-; X32-NOSSE-NEXT: addl %esi, %ebx
-; X32-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F
-; X32-NOSSE-NEXT: imull $16843009, %ebx, %esi # imm = 0x1010101
-; X32-NOSSE-NEXT: shrl $24, %esi
-; X32-NOSSE-NEXT: addl %edi, %esi
-; X32-NOSSE-NEXT: movl %edx, %edi
-; X32-NOSSE-NEXT: shrl %edi
-; X32-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555
-; X32-NOSSE-NEXT: subl %edi, %edx
-; X32-NOSSE-NEXT: movl %edx, %edi
-; X32-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
+; X32-NOSSE-NEXT: andl %ecx, %esi
+; X32-NOSSE-NEXT: addl %eax, %esi
+; X32-NOSSE-NEXT: movl %esi, %eax
+; X32-NOSSE-NEXT: shrl $4, %eax
+; X32-NOSSE-NEXT: addl %esi, %eax
+; X32-NOSSE-NEXT: movl %edx, %esi
+; X32-NOSSE-NEXT: shrl %esi
+; X32-NOSSE-NEXT: andl %ebp, %esi
+; X32-NOSSE-NEXT: subl %esi, %edx
+; X32-NOSSE-NEXT: movl %edx, %esi
+; X32-NOSSE-NEXT: andl %ecx, %esi
; X32-NOSSE-NEXT: shrl $2, %edx
-; X32-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
-; X32-NOSSE-NEXT: addl %edi, %edx
-; X32-NOSSE-NEXT: movl %edx, %edi
-; X32-NOSSE-NEXT: shrl $4, %edi
-; X32-NOSSE-NEXT: addl %edx, %edi
-; X32-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
-; X32-NOSSE-NEXT: imull $16843009, %edi, %edx # imm = 0x1010101
-; X32-NOSSE-NEXT: shrl $24, %edx
-; X32-NOSSE-NEXT: movl %ecx, %edi
-; X32-NOSSE-NEXT: shrl %edi
-; X32-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555
-; X32-NOSSE-NEXT: subl %edi, %ecx
-; X32-NOSSE-NEXT: movl %ecx, %edi
-; X32-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
-; X32-NOSSE-NEXT: shrl $2, %ecx
-; X32-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; X32-NOSSE-NEXT: addl %edi, %ecx
-; X32-NOSSE-NEXT: movl %ecx, %edi
-; X32-NOSSE-NEXT: shrl $4, %edi
-; X32-NOSSE-NEXT: addl %ecx, %edi
-; X32-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
-; X32-NOSSE-NEXT: imull $16843009, %edi, %ecx # imm = 0x1010101
-; X32-NOSSE-NEXT: shrl $24, %ecx
+; X32-NOSSE-NEXT: andl %ecx, %edx
+; X32-NOSSE-NEXT: addl %esi, %edx
+; X32-NOSSE-NEXT: movl %edx, %ecx
+; X32-NOSSE-NEXT: shrl $4, %ecx
; X32-NOSSE-NEXT: addl %edx, %ecx
-; X32-NOSSE-NEXT: addl %esi, %ecx
+; X32-NOSSE-NEXT: andl %ebx, %eax
+; X32-NOSSE-NEXT: andl %ebx, %ecx
+; X32-NOSSE-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
+; X32-NOSSE-NEXT: shrl $24, %eax
+; X32-NOSSE-NEXT: imull $16843009, %ecx, %ecx # imm = 0x1010101
+; X32-NOSSE-NEXT: shrl $24, %ecx
+; X32-NOSSE-NEXT: addl %eax, %ecx
+; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NOSSE-NEXT: addl %edi, %ecx
; X32-NOSSE-NEXT: xorl %edx, %edx
; X32-NOSSE-NEXT: movl %edx, 12(%eax)
; X32-NOSSE-NEXT: movl %edx, 8(%eax)
; X32-NOSSE-NEXT: popl %esi
; X32-NOSSE-NEXT: popl %edi
; X32-NOSSE-NEXT: popl %ebx
+; X32-NOSSE-NEXT: popl %ebp
; X32-NOSSE-NEXT: retl $4
;
; X64-LABEL: cnt128_optsize: