return true;
}
-/// Get the appropriate X86 opcode for an in-memory arithmetic operation that
-/// also sets flags.
-///
-/// FIXME: This is essentially re-implemneting a subset of the patterns for
-/// these instructions. Instead, we should compute this from the patterns
-/// somehow.
-///
-/// FIXME: Currently we only support integer operations.
-///
-/// If there is no X86 opcode, returns none.
-static Optional<unsigned> getFusedLdStWithFlagsOpcode(EVT LdVT, unsigned Opc) {
- auto SelectSize = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
- unsigned Opc8) -> Optional<unsigned> {
- switch (LdVT.getSimpleVT().SimpleTy) {
- case MVT::i64:
- return Opc64;
- case MVT::i32:
- return Opc32;
- case MVT::i16:
- return Opc16;
- case MVT::i8:
- return Opc8;
- default:
- return None;
- }
- };
- switch (Opc) {
- default:
- return None;
- case X86ISD::DEC:
- return SelectSize(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
- case X86ISD::INC:
- return SelectSize(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m);
- }
-}
-
/// Check whether or not the chain ending in StoreNode is suitable for doing
/// the {load; op; store} to modify transformation.
static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode,
return true;
}
-// Change a chain of {load; incr or dec; store} of the same value into
-// a simple increment or decrement through memory of that value, if the
-// uses of the modified value and its address are suitable.
-// The DEC64m tablegen pattern is currently not able to match the case where
-// the EFLAGS on the original DEC are used. (This also applies to
-// {INC,DEC}X{64,32,16,8}.)
-// We'll need to improve tablegen to allow flags to be transferred from a
-// node in the pattern to the result node. probably with a new keyword
-// for example, we have this
+// Change a chain of {load; op; store} of the same value into a simple op
+// through memory of that value, if the uses of the modified value and its
+// address are suitable.
+//
+// The tablegen pattern memory operand pattern is currently not able to match
+// the case where the EFLAGS on the original operation are used.
+//
+// To move this to tablegen, we'll need to improve tablegen to allow flags to
+// be transferred from a node in the pattern to the result node, probably with
+// a new keyword. For example, we have this
// def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
// [(store (add (loadi64 addr:$dst), -1), addr:$dst),
// (implicit EFLAGS)]>;
// [(store (add (loadi64 addr:$dst), -1), addr:$dst),
// (transferrable EFLAGS)]>;
//
-// FIXME: This should handle a wide range of operations which support RMW
-// memory operands, not just inc and dec.
+// Until then, we manually fold these and instruction select the operation
+// here.
bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
SDValue StoredVal = StoreNode->getOperand(1);
unsigned Opc = StoredVal->getOpcode();
+ // Before we try to select anything, make sure this is memory operand size
+ // and opcode we can handle. Note that this must match the code below that
+ // actually lowers the opcodes.
EVT MemVT = StoreNode->getMemoryVT();
- if (!MemVT.isSimple())
+ if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
+ MemVT != MVT::i8)
return false;
- Optional<unsigned> NewOpc = getFusedLdStWithFlagsOpcode(MemVT, Opc);
- if (!NewOpc)
+ switch (Opc) {
+ default:
return false;
+ case X86ISD::INC:
+ case X86ISD::DEC:
+ case X86ISD::ADD:
+ case X86ISD::SUB:
+ break;
+ }
LoadSDNode *LoadNode = nullptr;
SDValue InputChain;
Segment))
return false;
+ auto SelectOpcodeForSize = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
+ unsigned Opc8) {
+ switch (MemVT.getSimpleVT().SimpleTy) {
+ case MVT::i64:
+ return Opc64;
+ case MVT::i32:
+ return Opc32;
+ case MVT::i16:
+ return Opc16;
+ case MVT::i8:
+ return Opc8;
+ default:
+ llvm_unreachable("Invalid size!");
+ }
+ };
+
+ MachineSDNode *Result;
+ switch (Opc) {
+ case X86ISD::INC:
+ case X86ISD::DEC: {
+ unsigned NewOpc = Opc == X86ISD::INC
+ ? SelectOpcodeForSize(X86::INC64m, X86::INC32m,
+ X86::INC16m, X86::INC8m)
+ : SelectOpcodeForSize(X86::DEC64m, X86::DEC32m,
+ X86::DEC16m, X86::DEC8m);
+ const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
+ Result =
+ CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops);
+ break;
+ }
+ case X86ISD::ADD:
+ case X86ISD::SUB: {
+ unsigned NewOpc = Opc == X86ISD::ADD
+ ? SelectOpcodeForSize(X86::ADD64mr, X86::ADD32mr,
+ X86::ADD16mr, X86::ADD8mr)
+ : SelectOpcodeForSize(X86::SUB64mr, X86::SUB32mr,
+ X86::SUB16mr, X86::SUB8mr);
+ const SDValue Ops[] = {Base, Scale, Index,
+ Disp, Segment, StoredVal->getOperand(1),
+ InputChain};
+ Result =
+ CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops);
+ break;
+ }
+ default:
+ llvm_unreachable("Invalid opcode!");
+ }
+
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2);
MemOp[0] = StoreNode->getMemOperand();
MemOp[1] = LoadNode->getMemOperand();
- const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
- MachineSDNode *Result =
- CurDAG->getMachineNode(*NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops);
Result->setMemRefs(MemOp, MemOp + 2);
ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs | FileCheck %s
+
+target triple = "x86_64-unknown-unknown"
+
+@g64 = external global i64, align 8
+@g32 = external global i32, align 4
+@g16 = external global i16, align 2
+@g8 = external global i8, align 1
+
+declare void @a()
+declare void @b()
+
+define void @add64_imm_br() nounwind {
+; CHECK-LABEL: add64_imm_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: movl $42, %eax
+; CHECK-NEXT: addq %rax, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB0_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB0_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i64, i64* @g64
+ %add = add nsw i64 %load1, 42
+ store i64 %add, i64* @g64
+ %cond = icmp slt i64 %add, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @add32_imm_br() nounwind {
+; CHECK-LABEL: add32_imm_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: movl $42, %eax
+; CHECK-NEXT: addl %eax, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB1_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB1_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i32, i32* @g32
+ %add = add nsw i32 %load1, 42
+ store i32 %add, i32* @g32
+ %cond = icmp slt i32 %add, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @add16_imm_br() nounwind {
+; CHECK-LABEL: add16_imm_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: movw $42, %ax
+; CHECK-NEXT: addw %ax, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB2_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB2_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i16, i16* @g16
+ %add = add nsw i16 %load1, 42
+ store i16 %add, i16* @g16
+ %cond = icmp slt i16 %add, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @add8_imm_br() nounwind {
+; CHECK-LABEL: add8_imm_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: movb $42, %al
+; CHECK-NEXT: addb %al, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB3_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB3_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i8, i8* @g8
+ %add = add nsw i8 %load1, 42
+ store i8 %add, i8* @g8
+ %cond = icmp slt i8 %add, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @add64_reg_br(i64 %arg) nounwind {
+; CHECK-LABEL: add64_reg_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addq %rdi, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB4_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB4_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i64, i64* @g64
+ %add = add nsw i64 %load1, %arg
+ store i64 %add, i64* @g64
+ %cond = icmp slt i64 %add, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @add32_reg_br(i32 %arg) nounwind {
+; CHECK-LABEL: add32_reg_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addl %edi, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB5_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB5_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i32, i32* @g32
+ %add = add nsw i32 %load1, %arg
+ store i32 %add, i32* @g32
+ %cond = icmp slt i32 %add, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @add16_reg_br(i16 %arg) nounwind {
+; CHECK-LABEL: add16_reg_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addw %di, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB6_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB6_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i16, i16* @g16
+ %add = add nsw i16 %load1, %arg
+ store i16 %add, i16* @g16
+ %cond = icmp slt i16 %add, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @add8_reg_br(i8 %arg) nounwind {
+; CHECK-LABEL: add8_reg_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addb %dil, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB7_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB7_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i8, i8* @g8
+ %add = add nsw i8 %load1, %arg
+ store i8 %add, i8* @g8
+ %cond = icmp slt i8 %add, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @sub64_imm_br() nounwind {
+; CHECK-LABEL: sub64_imm_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: movq $-42, %rax
+; CHECK-NEXT: addq %rax, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB8_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB8_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i64, i64* @g64
+ %sub = sub nsw i64 %load1, 42
+ store i64 %sub, i64* @g64
+ %cond = icmp slt i64 %sub, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @sub32_imm_br() nounwind {
+; CHECK-LABEL: sub32_imm_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: movl $-42, %eax
+; CHECK-NEXT: addl %eax, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB9_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB9_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i32, i32* @g32
+ %sub = sub nsw i32 %load1, 42
+ store i32 %sub, i32* @g32
+ %cond = icmp slt i32 %sub, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @sub16_imm_br() nounwind {
+; CHECK-LABEL: sub16_imm_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: movw $-42, %ax
+; CHECK-NEXT: addw %ax, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB10_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB10_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i16, i16* @g16
+ %sub = sub nsw i16 %load1, 42
+ store i16 %sub, i16* @g16
+ %cond = icmp slt i16 %sub, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @sub8_imm_br() nounwind {
+; CHECK-LABEL: sub8_imm_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: movb $-42, %al
+; CHECK-NEXT: addb %al, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB11_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB11_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i8, i8* @g8
+ %sub = sub nsw i8 %load1, 42
+ store i8 %sub, i8* @g8
+ %cond = icmp slt i8 %sub, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @sub64_reg_br(i64 %arg) nounwind {
+; CHECK-LABEL: sub64_reg_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: subq %rdi, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB12_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB12_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i64, i64* @g64
+ %sub = sub nsw i64 %load1, %arg
+ store i64 %sub, i64* @g64
+ %cond = icmp slt i64 %sub, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @sub32_reg_br(i32 %arg) nounwind {
+; CHECK-LABEL: sub32_reg_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: subl %edi, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB13_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB13_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i32, i32* @g32
+ %sub = sub nsw i32 %load1, %arg
+ store i32 %sub, i32* @g32
+ %cond = icmp slt i32 %sub, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @sub16_reg_br(i16 %arg) nounwind {
+; CHECK-LABEL: sub16_reg_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: subw %di, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB14_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB14_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i16, i16* @g16
+ %sub = sub nsw i16 %load1, %arg
+ store i16 %sub, i16* @g16
+ %cond = icmp slt i16 %sub, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}
+
+define void @sub8_reg_br(i8 %arg) nounwind {
+; CHECK-LABEL: sub8_reg_br:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: subb %dil, {{.*}}(%rip)
+; CHECK-NEXT: js .LBB15_1
+; CHECK-NEXT: # BB#2: # %b
+; CHECK-NEXT: jmp b # TAILCALL
+; CHECK-NEXT: .LBB15_1: # %a
+; CHECK-NEXT: jmp a # TAILCALL
+entry:
+ %load1 = load i8, i8* @g8
+ %sub = sub nsw i8 %load1, %arg
+ store i8 %sub, i8* @g8
+ %cond = icmp slt i8 %sub, 0
+ br i1 %cond, label %a, label %b
+
+a:
+ tail call void @a()
+ ret void
+
+b:
+ tail call void @b()
+ ret void
+}