[x86] Handle more cases where we can re-use an atomic operation's flags

author Chandler Carruth <chandlerc@gmail.com>

Mon, 21 Aug 2017 08:45:19 +0000 (08:45 +0000)

committer Chandler Carruth <chandlerc@gmail.com>

Mon, 21 Aug 2017 08:45:19 +0000 (08:45 +0000)
author Chandler Carruth <chandlerc@gmail.com>
Mon, 21 Aug 2017 08:45:19 +0000 (08:45 +0000)
committer Chandler Carruth <chandlerc@gmail.com>
Mon, 21 Aug 2017 08:45:19 +0000 (08:45 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 8ecc035ec579ceb8bf0fef8396ad5e5b64b1b923..ce00a4a9665aca2f99dade439bae974159f8fb0b 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -30732,12 +30732,7 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
    if (!CmpLHS.hasOneUse())
      return SDValue();
  
-  auto *CmpRHSC = dyn_cast<ConstantSDNode>(CmpRHS);
-  if (!CmpRHSC || CmpRHSC->getZExtValue() != 0)
-    return SDValue();
-
-  const unsigned Opc = CmpLHS.getOpcode();
-
+  unsigned Opc = CmpLHS.getOpcode();
    if (Opc != ISD::ATOMIC_LOAD_ADD && Opc != ISD::ATOMIC_LOAD_SUB)
      return SDValue();
  
@@ -30750,6 +30745,35 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
    if (Opc == ISD::ATOMIC_LOAD_SUB)
      Addend = -Addend;
  
+  auto *CmpRHSC = dyn_cast<ConstantSDNode>(CmpRHS);
+  if (!CmpRHSC)
+    return SDValue();
+
+  APInt Comparison = CmpRHSC->getAPIntValue();
+
+  // If the addend is the negation of the comparison value, then we can do
+  // a full comparison by emitting the atomic arithmetic is a locked sub.
+  if (Comparison == -Addend) {
+    // The CC is fine, but we need to rewrite the LHS of the comparison as an
+    // atomic sub.
+    auto *AN = cast<AtomicSDNode>(CmpLHS.getNode());
+    auto AtomicSub = DAG.getAtomic(
+        ISD::ATOMIC_LOAD_SUB, SDLoc(CmpLHS), CmpLHS.getValueType(),
+        /*Chain*/ CmpLHS.getOperand(0), /*LHS*/ CmpLHS.getOperand(1),
+        /*RHS*/ DAG.getConstant(-Addend, SDLoc(CmpRHS), CmpRHS.getValueType()),
+        AN->getMemOperand());
+    auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG);
+    DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0),
+                                  DAG.getUNDEF(CmpLHS.getValueType()));
+    DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1));
+    return LockOp;
+  }
+
+  // We can handle comparisons with zero in a number of cases by manipulating
+  // the CC used.
+  if (!Comparison.isNullValue())
+    return SDValue();
+
    if (CC == X86::COND_S && Addend == 1)
      CC = X86::COND_LE;
    else if (CC == X86::COND_NS && Addend == 1)
diff --git a/test/CodeGen/X86/atomic-eflags-reuse.ll b/test/CodeGen/X86/atomic-eflags-reuse.ll

index 9521a2afefcd1a43f90dc12d28bc3bd4a3c29c74..a43b802e866d071e11609e7367c7dc4a3f6c4c77 100644 (file)
--- a/test/CodeGen/X86/atomic-eflags-reuse.ll
+++ b/test/CodeGen/X86/atomic-eflags-reuse.ll
@@ -192,4 +192,90 @@ entry:
    ret i8 %s2
  }
  
+define i8 @test_sub_1_cmp_1_setcc_eq(i64* %p) #0 {
+; CHECK-LABEL: test_sub_1_cmp_1_setcc_eq:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    lock decq (%rdi)
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    retq
+entry:
+  %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst
+  %tmp1 = icmp eq i64 %tmp0, 1
+  %tmp2 = zext i1 %tmp1 to i8
+  ret i8 %tmp2
+}
+
+define i8 @test_sub_1_cmp_1_setcc_ne(i64* %p) #0 {
+; CHECK-LABEL: test_sub_1_cmp_1_setcc_ne:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    lock decq (%rdi)
+; CHECK-NEXT:    setne %al
+; CHECK-NEXT:    retq
+entry:
+  %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst
+  %tmp1 = icmp ne i64 %tmp0, 1
+  %tmp2 = zext i1 %tmp1 to i8
+  ret i8 %tmp2
+}
+
+define i8 @test_sub_1_cmp_1_setcc_ugt(i64* %p) #0 {
+; CHECK-LABEL: test_sub_1_cmp_1_setcc_ugt:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    lock decq (%rdi)
+; CHECK-NEXT:    seta %al
+; CHECK-NEXT:    retq
+entry:
+  %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst
+  %tmp1 = icmp ugt i64 %tmp0, 1
+  %tmp2 = zext i1 %tmp1 to i8
+  ret i8 %tmp2
+}
+
+; FIXME: This test canonicalizes in a way that hides the fact that the
+; comparison can be folded into the atomic subtract.
+define i8 @test_sub_1_cmp_1_setcc_sle(i64* %p) #0 {
+; CHECK-LABEL: test_sub_1_cmp_1_setcc_sle:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    movq $-1, %rax
+; CHECK-NEXT:    lock xaddq %rax, (%rdi)
+; CHECK-NEXT:    cmpq $2, %rax
+; CHECK-NEXT:    setl %al
+; CHECK-NEXT:    retq
+entry:
+  %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst
+  %tmp1 = icmp sle i64 %tmp0, 1
+  %tmp2 = zext i1 %tmp1 to i8
+  ret i8 %tmp2
+}
+
+define i8 @test_sub_3_cmp_3_setcc_eq(i64* %p) #0 {
+; CHECK-LABEL: test_sub_3_cmp_3_setcc_eq:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    lock subq $3, (%rdi)
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    retq
+entry:
+  %tmp0 = atomicrmw sub i64* %p, i64 3 seq_cst
+  %tmp1 = icmp eq i64 %tmp0, 3
+  %tmp2 = zext i1 %tmp1 to i8
+  ret i8 %tmp2
+}
+
+; FIXME: This test canonicalizes in a way that hides the fact that the
+; comparison can be folded into the atomic subtract.
+define i8 @test_sub_3_cmp_3_setcc_uge(i64* %p) #0 {
+; CHECK-LABEL: test_sub_3_cmp_3_setcc_uge:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    movq $-3, %rax
+; CHECK-NEXT:    lock xaddq %rax, (%rdi)
+; CHECK-NEXT:    cmpq $2, %rax
+; CHECK-NEXT:    seta %al
+; CHECK-NEXT:    retq
+entry:
+  %tmp0 = atomicrmw sub i64* %p, i64 3 seq_cst
+  %tmp1 = icmp uge i64 %tmp0, 3
+  %tmp2 = zext i1 %tmp1 to i8
+  ret i8 %tmp2
+}
+
  attributes #0 = { nounwind }
author	Chandler Carruth <chandlerc@gmail.com>
	Mon, 21 Aug 2017 08:45:19 +0000 (08:45 +0000)
committer	Chandler Carruth <chandlerc@gmail.com>
	Mon, 21 Aug 2017 08:45:19 +0000 (08:45 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/atomic-eflags-reuse.ll		patch \| blob \| history