[x86] Teach the backend to fold more read-modify-write memory operands

author Chandler Carruth <chandlerc@gmail.com>

Fri, 25 Aug 2017 22:50:52 +0000 (22:50 +0000)

committer Chandler Carruth <chandlerc@gmail.com>

Fri, 25 Aug 2017 22:50:52 +0000 (22:50 +0000)
author Chandler Carruth <chandlerc@gmail.com>
Fri, 25 Aug 2017 22:50:52 +0000 (22:50 +0000)
committer Chandler Carruth <chandlerc@gmail.com>
Fri, 25 Aug 2017 22:50:52 +0000 (22:50 +0000)
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp

index 60fc8d1205b8a756b8a01dd9ab8e81deb8db8c22..17bf3523032245fc6b4ffd270b01f47ed3d3a2c0 100644 (file)
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1932,42 +1932,6 @@ static bool hasNoSignedComparisonUses(SDNode *N) {
    return true;
  }
  
-/// Get the appropriate X86 opcode for an in-memory arithmetic operation that
-/// also sets flags.
-///
-/// FIXME: This is essentially re-implemneting a subset of the patterns for
-/// these instructions. Instead, we should compute this from the patterns
-/// somehow.
-///
-/// FIXME: Currently we only support integer operations.
-///
-/// If there is no X86 opcode, returns none.
-static Optional<unsigned> getFusedLdStWithFlagsOpcode(EVT LdVT, unsigned Opc) {
-  auto SelectSize = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
-                        unsigned Opc8) -> Optional<unsigned> {
-    switch (LdVT.getSimpleVT().SimpleTy) {
-    case MVT::i64:
-      return Opc64;
-    case MVT::i32:
-      return Opc32;
-    case MVT::i16:
-      return Opc16;
-    case MVT::i8:
-      return Opc8;
-    default:
-      return None;
-    }
-  };
-  switch (Opc) {
-  default:
-    return None;
-  case X86ISD::DEC:
-    return SelectSize(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
-  case X86ISD::INC:
-    return SelectSize(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m);
-  }
-}
-
  /// Check whether or not the chain ending in StoreNode is suitable for doing
  /// the {load; op; store} to modify transformation.
  static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode,
@@ -2047,15 +2011,16 @@ static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode,
    return true;
  }
  
-// Change a chain of {load; incr or dec; store} of the same value into
-// a simple increment or decrement through memory of that value, if the
-// uses of the modified value and its address are suitable.
-// The DEC64m tablegen pattern is currently not able to match the case where
-// the EFLAGS on the original DEC are used. (This also applies to
-// {INC,DEC}X{64,32,16,8}.)
-// We'll need to improve tablegen to allow flags to be transferred from a
-// node in the pattern to the result node.  probably with a new keyword
-// for example, we have this
+// Change a chain of {load; op; store} of the same value into a simple op
+// through memory of that value, if the uses of the modified value and its
+// address are suitable.
+//
+// The tablegen pattern memory operand pattern is currently not able to match
+// the case where the EFLAGS on the original operation are used.
+//
+// To move this to tablegen, we'll need to improve tablegen to allow flags to
+// be transferred from a node in the pattern to the result node, probably with
+// a new keyword. For example, we have this
  // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
  //  [(store (add (loadi64 addr:$dst), -1), addr:$dst),
  //   (implicit EFLAGS)]>;
@@ -2064,19 +2029,29 @@ static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode,
  //  [(store (add (loadi64 addr:$dst), -1), addr:$dst),
  //   (transferrable EFLAGS)]>;
  //
-// FIXME: This should handle a wide range of operations which support RMW
-// memory operands, not just inc and dec.
+// Until then, we manually fold these and instruction select the operation
+// here.
  bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
    StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
    SDValue StoredVal = StoreNode->getOperand(1);
    unsigned Opc = StoredVal->getOpcode();
  
+  // Before we try to select anything, make sure this is memory operand size
+  // and opcode we can handle. Note that this must match the code below that
+  // actually lowers the opcodes.
    EVT MemVT = StoreNode->getMemoryVT();
-  if (!MemVT.isSimple())
+  if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
+      MemVT != MVT::i8)
      return false;
-  Optional<unsigned> NewOpc = getFusedLdStWithFlagsOpcode(MemVT, Opc);
-  if (!NewOpc)
+  switch (Opc) {
+  default:
      return false;
+  case X86ISD::INC:
+  case X86ISD::DEC:
+  case X86ISD::ADD:
+  case X86ISD::SUB:
+    break;
+  }
  
    LoadSDNode *LoadNode = nullptr;
    SDValue InputChain;
@@ -2089,12 +2064,57 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
                    Segment))
      return false;
  
+  auto SelectOpcodeForSize = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
+                                 unsigned Opc8) {
+    switch (MemVT.getSimpleVT().SimpleTy) {
+    case MVT::i64:
+      return Opc64;
+    case MVT::i32:
+      return Opc32;
+    case MVT::i16:
+      return Opc16;
+    case MVT::i8:
+      return Opc8;
+    default:
+      llvm_unreachable("Invalid size!");
+    }
+  };
+
+  MachineSDNode *Result;
+  switch (Opc) {
+  case X86ISD::INC:
+  case X86ISD::DEC: {
+    unsigned NewOpc = Opc == X86ISD::INC
+                          ? SelectOpcodeForSize(X86::INC64m, X86::INC32m,
+                                                X86::INC16m, X86::INC8m)
+                          : SelectOpcodeForSize(X86::DEC64m, X86::DEC32m,
+                                                X86::DEC16m, X86::DEC8m);
+    const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
+    Result =
+        CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops);
+    break;
+  }
+  case X86ISD::ADD:
+  case X86ISD::SUB: {
+    unsigned NewOpc = Opc == X86ISD::ADD
+                          ? SelectOpcodeForSize(X86::ADD64mr, X86::ADD32mr,
+                                                X86::ADD16mr, X86::ADD8mr)
+                          : SelectOpcodeForSize(X86::SUB64mr, X86::SUB32mr,
+                                                X86::SUB16mr, X86::SUB8mr);
+    const SDValue Ops[] = {Base,      Scale,   Index,
+                           Disp,      Segment, StoredVal->getOperand(1),
+                           InputChain};
+    Result =
+        CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops);
+    break;
+  }
+  default:
+    llvm_unreachable("Invalid opcode!");
+  }
+
    MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2);
    MemOp[0] = StoreNode->getMemOperand();
    MemOp[1] = LoadNode->getMemOperand();
-  const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
-  MachineSDNode *Result =
-      CurDAG->getMachineNode(*NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops);
    Result->setMemRefs(MemOp, MemOp + 2);
  
    ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll

index ddc83e429b0678c8766bfeeb4b47f274ffb3c136..28ababe29f76cbb08d0aa8640cfa6ed4ce9f108a 100644 (file)
--- a/test/CodeGen/X86/add.ll
+++ b/test/CodeGen/X86/add.ll
@@ -341,9 +341,8 @@ define void @test12(i64* inreg %a) nounwind {
  ; X32-LABEL: test12:
  ; X32:       # BB#0: # %entry
  ; X32-NEXT:    movl $-2147483648, %ecx # imm = 0x80000000
-; X32-NEXT:    addl (%eax), %ecx
+; X32-NEXT:    addl %ecx, (%eax)
  ; X32-NEXT:    adcl $0, 4(%eax)
-; X32-NEXT:    movl %ecx, (%eax)
  ; X32-NEXT:    retl
  ;
  ; X64-LINUX-LABEL: test12:
@@ -366,9 +365,8 @@ define void @test13(i64* inreg %a) nounwind {
  ; X32-LABEL: test13:
  ; X32:       # BB#0: # %entry
  ; X32-NEXT:    movl $128, %ecx
-; X32-NEXT:    addl (%eax), %ecx
+; X32-NEXT:    addl %ecx, (%eax)
  ; X32-NEXT:    adcl $0, 4(%eax)
-; X32-NEXT:    movl %ecx, (%eax)
  ; X32-NEXT:    retl
  ;
  ; X64-LINUX-LABEL: test13:
diff --git a/test/CodeGen/X86/addcarry.ll b/test/CodeGen/X86/addcarry.ll

index cffcfd8e8a42663319ff611bbd28cb6d47fb384d..5fd045389bdb2e92d537a420a7af300d2c15ece9 100644 (file)
--- a/test/CodeGen/X86/addcarry.ll
+++ b/test/CodeGen/X86/addcarry.ll
@@ -171,8 +171,7 @@ define void @muladd(%accumulator* nocapture %this, i64 %arg.a, i64 %arg.b) {
  ; CHECK:       # BB#0: # %entry
  ; CHECK-NEXT:    movq %rdx, %rax
  ; CHECK-NEXT:    mulq %rsi
-; CHECK-NEXT:    addq (%rdi), %rax
-; CHECK-NEXT:    movq %rax, (%rdi)
+; CHECK-NEXT:    addq %rax, (%rdi)
  ; CHECK-NEXT:    adcq 8(%rdi), %rdx
  ; CHECK-NEXT:    movq %rdx, 8(%rdi)
  ; CHECK-NEXT:    adcl $0, 16(%rdi)
diff --git a/test/CodeGen/X86/fold-rmw-ops.ll b/test/CodeGen/X86/fold-rmw-ops.ll

new file mode 100644 (file)

index 0000000..e9457bb
--- /dev/null
+++ b/test/CodeGen/X86/fold-rmw-ops.ll
@@ -0,0 +1,420 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs | FileCheck %s
+
+target triple = "x86_64-unknown-unknown"
+
+@g64 = external global i64, align 8
+@g32 = external global i32, align 4
+@g16 = external global i16, align 2
+@g8 = external global i8, align 1
+
+declare void @a()
+declare void @b()
+
+define void @add64_imm_br() nounwind {
+; CHECK-LABEL: add64_imm_br:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    movl $42, %eax
+; CHECK-NEXT:    addq %rax, {{.*}}(%rip)
+; CHECK-NEXT:    js .LBB0_1
+; CHECK-NEXT:  # BB#2: # %b
+; CHECK-NEXT:    jmp b # TAILCALL
+; CHECK-NEXT:  .LBB0_1: # %a
+; CHECK-NEXT:    jmp a # TAILCALL
+entry:
+  %load1 = load i64, i64* @g64
+  %add = add nsw i64 %load1, 42
+  store i64 %add, i64* @g64
+  %cond = icmp slt i64 %add, 0
+  br i1 %cond, label %a, label %b
+
+a:
+  tail call void @a()
+  ret void
+
+b:
+  tail call void @b()
+  ret void
+}
+
+define void @add32_imm_br() nounwind {
+; CHECK-LABEL: add32_imm_br:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    movl $42, %eax
+; CHECK-NEXT:    addl %eax, {{.*}}(%rip)
+; CHECK-NEXT:    js .LBB1_1
+; CHECK-NEXT:  # BB#2: # %b
+; CHECK-NEXT:    jmp b # TAILCALL
+; CHECK-NEXT:  .LBB1_1: # %a
+; CHECK-NEXT:    jmp a # TAILCALL
+entry:
+  %load1 = load i32, i32* @g32
+  %add = add nsw i32 %load1, 42
+  store i32 %add, i32* @g32
+  %cond = icmp slt i32 %add, 0
+  br i1 %cond, label %a, label %b
+
+a:
+  tail call void @a()
+  ret void
+
+b:
+  tail call void @b()
+  ret void
+}
+
+define void @add16_imm_br() nounwind {
+; CHECK-LABEL: add16_imm_br:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    movw $42, %ax
+; CHECK-NEXT:    addw %ax, {{.*}}(%rip)
+; CHECK-NEXT:    js .LBB2_1
+; CHECK-NEXT:  # BB#2: # %b
+; CHECK-NEXT:    jmp b # TAILCALL
+; CHECK-NEXT:  .LBB2_1: # %a
+; CHECK-NEXT:    jmp a # TAILCALL
+entry:
+  %load1 = load i16, i16* @g16
+  %add = add nsw i16 %load1, 42
+  store i16 %add, i16* @g16
+  %cond = icmp slt i16 %add, 0
+  br i1 %cond, label %a, label %b
+
+a:
+  tail call void @a()
+  ret void
+
+b:
+  tail call void @b()
+  ret void
+}
+
+define void @add8_imm_br() nounwind {
+; CHECK-LABEL: add8_imm_br:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    movb $42, %al
+; CHECK-NEXT:    addb %al, {{.*}}(%rip)
+; CHECK-NEXT:    js .LBB3_1
+; CHECK-NEXT:  # BB#2: # %b
+; CHECK-NEXT:    jmp b # TAILCALL
+; CHECK-NEXT:  .LBB3_1: # %a
+; CHECK-NEXT:    jmp a # TAILCALL
+entry:
+  %load1 = load i8, i8* @g8
+  %add = add nsw i8 %load1, 42
+  store i8 %add, i8* @g8
+  %cond = icmp slt i8 %add, 0
+  br i1 %cond, label %a, label %b
+
+a:
+  tail call void @a()
+  ret void
+
+b:
+  tail call void @b()
+  ret void
+}
+
+define void @add64_reg_br(i64 %arg) nounwind {
+; CHECK-LABEL: add64_reg_br:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addq %rdi, {{.*}}(%rip)
+; CHECK-NEXT:    js .LBB4_1
+; CHECK-NEXT:  # BB#2: # %b
+; CHECK-NEXT:    jmp b # TAILCALL
+; CHECK-NEXT:  .LBB4_1: # %a
+; CHECK-NEXT:    jmp a # TAILCALL
+entry:
+  %load1 = load i64, i64* @g64
+  %add = add nsw i64 %load1, %arg
+  store i64 %add, i64* @g64
+  %cond = icmp slt i64 %add, 0
+  br i1 %cond, label %a, label %b
+
+a:
+  tail call void @a()
+  ret void
+
+b:
+  tail call void @b()
+  ret void
+}
+
+define void @add32_reg_br(i32 %arg) nounwind {
+; CHECK-LABEL: add32_reg_br:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addl %edi, {{.*}}(%rip)
+; CHECK-NEXT:    js .LBB5_1
+; CHECK-NEXT:  # BB#2: # %b
+; CHECK-NEXT:    jmp b # TAILCALL
+; CHECK-NEXT:  .LBB5_1: # %a
+; CHECK-NEXT:    jmp a # TAILCALL
+entry:
+  %load1 = load i32, i32* @g32
+  %add = add nsw i32 %load1, %arg
+  store i32 %add, i32* @g32
+  %cond = icmp slt i32 %add, 0
+  br i1 %cond, label %a, label %b
+
+a:
+  tail call void @a()
+  ret void
+
+b:
+  tail call void @b()
+  ret void
+}
+
+define void @add16_reg_br(i16 %arg) nounwind {
+; CHECK-LABEL: add16_reg_br:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addw %di, {{.*}}(%rip)
+; CHECK-NEXT:    js .LBB6_1
+; CHECK-NEXT:  # BB#2: # %b
+; CHECK-NEXT:    jmp b # TAILCALL
+; CHECK-NEXT:  .LBB6_1: # %a
+; CHECK-NEXT:    jmp a # TAILCALL
+entry:
+  %load1 = load i16, i16* @g16
+  %add = add nsw i16 %load1, %arg
+  store i16 %add, i16* @g16
+  %cond = icmp slt i16 %add, 0
+  br i1 %cond, label %a, label %b
+
+a:
+  tail call void @a()
+  ret void
+
+b:
+  tail call void @b()
+  ret void
+}
+
+define void @add8_reg_br(i8 %arg) nounwind {
+; CHECK-LABEL: add8_reg_br:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addb %dil, {{.*}}(%rip)
+; CHECK-NEXT:    js .LBB7_1
+; CHECK-NEXT:  # BB#2: # %b
+; CHECK-NEXT:    jmp b # TAILCALL
+; CHECK-NEXT:  .LBB7_1: # %a
+; CHECK-NEXT:    jmp a # TAILCALL
+entry:
+  %load1 = load i8, i8* @g8
+  %add = add nsw i8 %load1, %arg
+  store i8 %add, i8* @g8
+  %cond = icmp slt i8 %add, 0
+  br i1 %cond, label %a, label %b
+
+a:
+  tail call void @a()
+  ret void
+
+b:
+  tail call void @b()
+  ret void
+}
+
+define void @sub64_imm_br() nounwind {
+; CHECK-LABEL: sub64_imm_br:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    movq $-42, %rax
+; CHECK-NEXT:    addq %rax, {{.*}}(%rip)
+; CHECK-NEXT:    js .LBB8_1
+; CHECK-NEXT:  # BB#2: # %b
+; CHECK-NEXT:    jmp b # TAILCALL
+; CHECK-NEXT:  .LBB8_1: # %a
+; CHECK-NEXT:    jmp a # TAILCALL
+entry:
+  %load1 = load i64, i64* @g64
+  %sub = sub nsw i64 %load1, 42
+  store i64 %sub, i64* @g64
+  %cond = icmp slt i64 %sub, 0
+  br i1 %cond, label %a, label %b
+
+a:
+  tail call void @a()
+  ret void
+
+b:
+  tail call void @b()
+  ret void
+}
+
+define void @sub32_imm_br() nounwind {
+; CHECK-LABEL: sub32_imm_br:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    movl $-42, %eax
+; CHECK-NEXT:    addl %eax, {{.*}}(%rip)
+; CHECK-NEXT:    js .LBB9_1
+; CHECK-NEXT:  # BB#2: # %b
+; CHECK-NEXT:    jmp b # TAILCALL
+; CHECK-NEXT:  .LBB9_1: # %a
+; CHECK-NEXT:    jmp a # TAILCALL
+entry:
+  %load1 = load i32, i32* @g32
+  %sub = sub nsw i32 %load1, 42
+  store i32 %sub, i32* @g32
+  %cond = icmp slt i32 %sub, 0
+  br i1 %cond, label %a, label %b
+
+a:
+  tail call void @a()
+  ret void
+
+b:
+  tail call void @b()
+  ret void
+}
+
+define void @sub16_imm_br() nounwind {
+; CHECK-LABEL: sub16_imm_br:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    movw $-42, %ax
+; CHECK-NEXT:    addw %ax, {{.*}}(%rip)
+; CHECK-NEXT:    js .LBB10_1
+; CHECK-NEXT:  # BB#2: # %b
+; CHECK-NEXT:    jmp b # TAILCALL
+; CHECK-NEXT:  .LBB10_1: # %a
+; CHECK-NEXT:    jmp a # TAILCALL
+entry:
+  %load1 = load i16, i16* @g16
+  %sub = sub nsw i16 %load1, 42
+  store i16 %sub, i16* @g16
+  %cond = icmp slt i16 %sub, 0
+  br i1 %cond, label %a, label %b
+
+a:
+  tail call void @a()
+  ret void
+
+b:
+  tail call void @b()
+  ret void
+}
+
+define void @sub8_imm_br() nounwind {
+; CHECK-LABEL: sub8_imm_br:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    movb $-42, %al
+; CHECK-NEXT:    addb %al, {{.*}}(%rip)
+; CHECK-NEXT:    js .LBB11_1
+; CHECK-NEXT:  # BB#2: # %b
+; CHECK-NEXT:    jmp b # TAILCALL
+; CHECK-NEXT:  .LBB11_1: # %a
+; CHECK-NEXT:    jmp a # TAILCALL
+entry:
+  %load1 = load i8, i8* @g8
+  %sub = sub nsw i8 %load1, 42
+  store i8 %sub, i8* @g8
+  %cond = icmp slt i8 %sub, 0
+  br i1 %cond, label %a, label %b
+
+a:
+  tail call void @a()
+  ret void
+
+b:
+  tail call void @b()
+  ret void
+}
+
+define void @sub64_reg_br(i64 %arg) nounwind {
+; CHECK-LABEL: sub64_reg_br:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    subq %rdi, {{.*}}(%rip)
+; CHECK-NEXT:    js .LBB12_1
+; CHECK-NEXT:  # BB#2: # %b
+; CHECK-NEXT:    jmp b # TAILCALL
+; CHECK-NEXT:  .LBB12_1: # %a
+; CHECK-NEXT:    jmp a # TAILCALL
+entry:
+  %load1 = load i64, i64* @g64
+  %sub = sub nsw i64 %load1, %arg
+  store i64 %sub, i64* @g64
+  %cond = icmp slt i64 %sub, 0
+  br i1 %cond, label %a, label %b
+
+a:
+  tail call void @a()
+  ret void
+
+b:
+  tail call void @b()
+  ret void
+}
+
+define void @sub32_reg_br(i32 %arg) nounwind {
+; CHECK-LABEL: sub32_reg_br:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    subl %edi, {{.*}}(%rip)
+; CHECK-NEXT:    js .LBB13_1
+; CHECK-NEXT:  # BB#2: # %b
+; CHECK-NEXT:    jmp b # TAILCALL
+; CHECK-NEXT:  .LBB13_1: # %a
+; CHECK-NEXT:    jmp a # TAILCALL
+entry:
+  %load1 = load i32, i32* @g32
+  %sub = sub nsw i32 %load1, %arg
+  store i32 %sub, i32* @g32
+  %cond = icmp slt i32 %sub, 0
+  br i1 %cond, label %a, label %b
+
+a:
+  tail call void @a()
+  ret void
+
+b:
+  tail call void @b()
+  ret void
+}
+
+define void @sub16_reg_br(i16 %arg) nounwind {
+; CHECK-LABEL: sub16_reg_br:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    subw %di, {{.*}}(%rip)
+; CHECK-NEXT:    js .LBB14_1
+; CHECK-NEXT:  # BB#2: # %b
+; CHECK-NEXT:    jmp b # TAILCALL
+; CHECK-NEXT:  .LBB14_1: # %a
+; CHECK-NEXT:    jmp a # TAILCALL
+entry:
+  %load1 = load i16, i16* @g16
+  %sub = sub nsw i16 %load1, %arg
+  store i16 %sub, i16* @g16
+  %cond = icmp slt i16 %sub, 0
+  br i1 %cond, label %a, label %b
+
+a:
+  tail call void @a()
+  ret void
+
+b:
+  tail call void @b()
+  ret void
+}
+
+define void @sub8_reg_br(i8 %arg) nounwind {
+; CHECK-LABEL: sub8_reg_br:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    subb %dil, {{.*}}(%rip)
+; CHECK-NEXT:    js .LBB15_1
+; CHECK-NEXT:  # BB#2: # %b
+; CHECK-NEXT:    jmp b # TAILCALL
+; CHECK-NEXT:  .LBB15_1: # %a
+; CHECK-NEXT:    jmp a # TAILCALL
+entry:
+  %load1 = load i8, i8* @g8
+  %sub = sub nsw i8 %load1, %arg
+  store i8 %sub, i8* @g8
+  %cond = icmp slt i8 %sub, 0
+  br i1 %cond, label %a, label %b
+
+a:
+  tail call void @a()
+  ret void
+
+b:
+  tail call void @b()
+  ret void
+}
diff --git a/test/CodeGen/X86/peephole-na-phys-copy-folding.ll b/test/CodeGen/X86/peephole-na-phys-copy-folding.ll

index ef495c10a5a5060a3a82fc6187df311dea793dba..6a83fe01b09483e406320e12b1670982adba39ab 100644 (file)
--- a/test/CodeGen/X86/peephole-na-phys-copy-folding.ll
+++ b/test/CodeGen/X86/peephole-na-phys-copy-folding.ll
@@ -65,10 +65,9 @@ exit2:
  define i1 @plus_forty_two() nounwind {
  ; CHECK32-LABEL: plus_forty_two:
  ; CHECK32:       # BB#0: # %entry
-; CHECK32-NEXT:    movl L, %ecx
  ; CHECK32-NEXT:    movb M, %al
-; CHECK32-NEXT:    addl $42, %ecx
-; CHECK32-NEXT:    movl %ecx, L
+; CHECK32-NEXT:    movl $42, %ecx
+; CHECK32-NEXT:    addl %ecx, L
  ; CHECK32-NEXT:    jne .LBB1_2
  ; CHECK32-NEXT:  # BB#1: # %entry
  ; CHECK32-NEXT:    andb $8, %al
@@ -82,10 +81,9 @@ define i1 @plus_forty_two() nounwind {
  ;
  ; CHECK64-LABEL: plus_forty_two:
  ; CHECK64:       # BB#0: # %entry
-; CHECK64-NEXT:    movl {{.*}}(%rip), %ecx
  ; CHECK64-NEXT:    movb {{.*}}(%rip), %al
-; CHECK64-NEXT:    addl $42, %ecx
-; CHECK64-NEXT:    movl %ecx, {{.*}}(%rip)
+; CHECK64-NEXT:    movl $42, %ecx
+; CHECK64-NEXT:    addl %ecx, {{.*}}(%rip)
  ; CHECK64-NEXT:    jne .LBB1_2
  ; CHECK64-NEXT:  # BB#1: # %entry
  ; CHECK64-NEXT:    andb $8, %al
@@ -165,10 +163,9 @@ exit2:
  define i1 @minus_forty_two() nounwind {
  ; CHECK32-LABEL: minus_forty_two:
  ; CHECK32:       # BB#0: # %entry
-; CHECK32-NEXT:    movl L, %ecx
  ; CHECK32-NEXT:    movb M, %al
-; CHECK32-NEXT:    addl $-42, %ecx
-; CHECK32-NEXT:    movl %ecx, L
+; CHECK32-NEXT:    movl $-42, %ecx
+; CHECK32-NEXT:    addl %ecx, L
  ; CHECK32-NEXT:    jne .LBB3_2
  ; CHECK32-NEXT:  # BB#1: # %entry
  ; CHECK32-NEXT:    andb $8, %al
@@ -182,10 +179,9 @@ define i1 @minus_forty_two() nounwind {
  ;
  ; CHECK64-LABEL: minus_forty_two:
  ; CHECK64:       # BB#0: # %entry
-; CHECK64-NEXT:    movl {{.*}}(%rip), %ecx
  ; CHECK64-NEXT:    movb {{.*}}(%rip), %al
-; CHECK64-NEXT:    addl $-42, %ecx
-; CHECK64-NEXT:    movl %ecx, {{.*}}(%rip)
+; CHECK64-NEXT:    movl $-42, %ecx
+; CHECK64-NEXT:    addl %ecx, {{.*}}(%rip)
  ; CHECK64-NEXT:    jne .LBB3_2
  ; CHECK64-NEXT:  # BB#1: # %entry
  ; CHECK64-NEXT:    andb $8, %al
diff --git a/test/CodeGen/X86/pr32659.ll b/test/CodeGen/X86/pr32659.ll

index f127f6016784e4dc853d06aec6c916da04559b9d..dd50745da32cf8c34dc3fa503d121ccf82ef282b 100644 (file)
--- a/test/CodeGen/X86/pr32659.ll
+++ b/test/CodeGen/X86/pr32659.ll
@@ -50,10 +50,10 @@ define void @fn2() nounwind optsize {
  ; CHECK-NEXT:    sarl $31, %eax
  ; CHECK-NEXT:    andl %eax, e+4
  ; CHECK-NEXT:    decl g
-; CHECK-NEXT:    movl f, %eax
-; CHECK-NEXT:    addl $1, %eax
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    incl %eax
+; CHECK-NEXT:    addl %eax, f
  ; CHECK-NEXT:    adcl $0, f+4
-; CHECK-NEXT:    movl %eax, f
  ; CHECK-NEXT:    addl $8, %esp
  ; CHECK-NEXT:    popl %ebx
  ; CHECK-NEXT:    retl
author	Chandler Carruth <chandlerc@gmail.com>
	Fri, 25 Aug 2017 22:50:52 +0000 (22:50 +0000)
committer	Chandler Carruth <chandlerc@gmail.com>
	Fri, 25 Aug 2017 22:50:52 +0000 (22:50 +0000)
lib/Target/X86/X86ISelDAGToDAG.cpp		patch \| blob \| history
test/CodeGen/X86/add.ll		patch \| blob \| history
test/CodeGen/X86/addcarry.ll		patch \| blob \| history
test/CodeGen/X86/fold-rmw-ops.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/X86/peephole-na-phys-copy-folding.ll		patch \| blob \| history
test/CodeGen/X86/pr32659.ll		patch \| blob \| history