From 5811c15d6be769c45103ee1d3282d84d7dafca4d Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@hanshq.net>
Date: Wed, 11 Jan 2017 01:36:57 +0000
Subject: [PATCH] Re-commit r289955: [X86] Fold (setcc (cmp (atomic_load_add x,
 -C) C), COND) to (setcc (LADD x, -C), COND) (PR31367)

This was reverted because it would miscompile code where the cmp had
multiple uses. That was due to a deficiency in the existing code, which
was fixed in r291630 (see the PR for details).

This re-commit includes an extra test for the kind of code that got
miscompiled: @test_sub_1_setcc_jcc.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291640 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86ISelLowering.cpp      | 32 +++++++++----
 test/CodeGen/X86/atomic-eflags-reuse.ll | 64 +++++++++++++++++++++++++
 2 files changed, 86 insertions(+), 10 deletions(-)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index fac72049587..84c51182739 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -29391,11 +29391,19 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-/// Combine:
+/// Combine brcond/cmov/setcc/.. based on comparing the result of
+/// atomic_load_add to use EFLAGS produced by the addition
+/// directly if possible. For example:
+///
+///   (setcc (cmp (atomic_load_add x, -C) C), COND_E)
+/// becomes:
+///   (setcc (LADD x, -C), COND_E)
+///
+/// and
 ///   (brcond/cmov/setcc .., (cmp (atomic_load_add x, 1), 0), COND_S)
-/// to:
+/// becomes:
 ///   (brcond/cmov/setcc .., (LADD x, 1), COND_LE)
-/// i.e., reusing the EFLAGS produced by the LOCKed instruction.
+///
 /// Note that this is only legal for some op/cc combinations.
 static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
                                        SelectionDAG &DAG) {
@@ -29410,7 +29418,7 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
   if (!Cmp.hasOneUse())
     return SDValue();
 
-  // This only applies to variations of the common case:
+  // This applies to variations of the common case:
   //   (icmp slt x, 0) -> (icmp sle (add x, 1), 0)
   //   (icmp sge x, 0) -> (icmp sgt (add x, 1), 0)
   //   (icmp sle x, 0) -> (icmp slt (sub x, 1), 0)
@@ -29429,8 +29437,9 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
     return SDValue();
 
   auto *CmpRHSC = dyn_cast<ConstantSDNode>(CmpRHS);
-  if (!CmpRHSC || CmpRHSC->getZExtValue() != 0)
+  if (!CmpRHSC)
     return SDValue();
+  APInt Comparand = CmpRHSC->getAPIntValue();
 
   const unsigned Opc = CmpLHS.getOpcode();
 
@@ -29446,16 +29455,19 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
   if (Opc == ISD::ATOMIC_LOAD_SUB)
     Addend = -Addend;
 
-  if (CC == X86::COND_S && Addend == 1)
+  if (Comparand == -Addend) {
+    // No change to CC.
+  } else if (CC == X86::COND_S && Comparand == 0 && Addend == 1) {
     CC = X86::COND_LE;
-  else if (CC == X86::COND_NS && Addend == 1)
+  } else if (CC == X86::COND_NS && Comparand == 0 && Addend == 1) {
     CC = X86::COND_G;
-  else if (CC == X86::COND_G && Addend == -1)
+  } else if (CC == X86::COND_G && Comparand == 0 && Addend == -1) {
     CC = X86::COND_GE;
-  else if (CC == X86::COND_LE && Addend == -1)
+  } else if (CC == X86::COND_LE && Comparand == 0 && Addend == -1) {
     CC = X86::COND_L;
-  else
+  } else {
     return SDValue();
+  }
 
   SDValue LockOp = lowerAtomicArithWithLOCK(CmpLHS, DAG);
   DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0),
diff --git a/test/CodeGen/X86/atomic-eflags-reuse.ll b/test/CodeGen/X86/atomic-eflags-reuse.ll
index 9521a2afefc..9902325fd14 100644
--- a/test/CodeGen/X86/atomic-eflags-reuse.ll
+++ b/test/CodeGen/X86/atomic-eflags-reuse.ll
@@ -192,4 +192,68 @@ entry:
   ret i8 %s2
 }
 
+define i8 @test_sub_1_setcc_eq(i64* %p) #0 {
+; CHECK-LABEL: test_sub_1_setcc_eq:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    lock decq (%rdi)
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    retq
+entry:
+  %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst
+  %tmp1 = icmp eq i64 %tmp0, 1
+  %tmp2 = zext i1 %tmp1 to i8
+  ret i8 %tmp2
+}
+
+define i8 @test_add_5_setcc_ne(i64* %p) #0 {
+; CHECK-LABEL: test_add_5_setcc_ne:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    lock addq $5, (%rdi)
+; CHECK-NEXT:    setne %al
+; CHECK-NEXT:    retq
+entry:
+  %tmp0 = atomicrmw add i64* %p, i64 5 seq_cst
+  %tmp1 = icmp ne i64 %tmp0, -5
+  %tmp2 = zext i1 %tmp1 to i8
+  ret i8 %tmp2
+}
+
+define i8 @test_add_5_setcc_ne_comparand_mismatch(i64* %p) #0 {
+; CHECK-LABEL: test_add_5_setcc_ne_comparand_mismatch:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    movl $5, %eax
+; CHECK-NEXT:    lock xaddq %rax, (%rdi)
+; CHECK-NEXT:    testq %rax, %rax
+; CHECK-NEXT:    setne %al
+; CHECK-NEXT:    retq
+entry:
+  %tmp0 = atomicrmw add i64* %p, i64 5 seq_cst
+  %tmp1 = icmp ne i64 %tmp0, 0
+  %tmp2 = zext i1 %tmp1 to i8
+  ret i8 %tmp2
+}
+
+declare void @g()
+define zeroext i1 @test_sub_1_setcc_jcc(i64* %p) local_unnamed_addr #0 {
+; TODO: It's possible to use "lock dec" here, but both uses of the cmp need to
+; be updated.
+; CHECK-LABEL: test_sub_1_setcc_jcc:
+; CHECK:       # BB#0: # %entry
+; CHECK:	     movq $-1, %rax
+; CHECK-NEXT:  lock xaddq %rax, (%rdi)
+; CHECK-NEXT:  cmpq $1, %rax
+; CHECK-NEXT:  sete %bl
+; CHECK-NEXT:  jne
+entry:
+  %add = atomicrmw volatile add i64* %p, i64 -1 seq_cst
+  %cmp = icmp ne i64 %add, 1
+  %not = xor i1 %cmp, true
+  br i1 %cmp, label %else, label %then
+then:
+  tail call void @g()
+  br label %else
+else:
+  ret i1 %not
+}
+
 attributes #0 = { nounwind }
-- 
2.50.0