From 5811c15d6be769c45103ee1d3282d84d7dafca4d Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Wed, 11 Jan 2017 01:36:57 +0000 Subject: [PATCH] Re-commit r289955: [X86] Fold (setcc (cmp (atomic_load_add x, -C) C), COND) to (setcc (LADD x, -C), COND) (PR31367) This was reverted because it would miscompile code where the cmp had multiple uses. That was due to a deficiency in the existing code, which was fixed in r291630 (see the PR for details). This re-commit includes an extra test for the kind of code that got miscompiled: @test_sub_1_setcc_jcc. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291640 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 32 +++++++++---- test/CodeGen/X86/atomic-eflags-reuse.ll | 64 +++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 10 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index fac72049587..84c51182739 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -29391,11 +29391,19 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, return SDValue(); } -/// Combine: +/// Combine brcond/cmov/setcc/.. based on comparing the result of +/// atomic_load_add to use EFLAGS produced by the addition +/// directly if possible. For example: +/// +/// (setcc (cmp (atomic_load_add x, -C) C), COND_E) +/// becomes: +/// (setcc (LADD x, -C), COND_E) +/// +/// and /// (brcond/cmov/setcc .., (cmp (atomic_load_add x, 1), 0), COND_S) -/// to: +/// becomes: /// (brcond/cmov/setcc .., (LADD x, 1), COND_LE) -/// i.e., reusing the EFLAGS produced by the LOCKed instruction. +/// /// Note that this is only legal for some op/cc combinations. static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC, SelectionDAG &DAG) { @@ -29410,7 +29418,7 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC, if (!Cmp.hasOneUse()) return SDValue(); - // This only applies to variations of the common case: + // This applies to variations of the common case: // (icmp slt x, 0) -> (icmp sle (add x, 1), 0) // (icmp sge x, 0) -> (icmp sgt (add x, 1), 0) // (icmp sle x, 0) -> (icmp slt (sub x, 1), 0) @@ -29429,8 +29437,9 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC, return SDValue(); auto *CmpRHSC = dyn_cast(CmpRHS); - if (!CmpRHSC || CmpRHSC->getZExtValue() != 0) + if (!CmpRHSC) return SDValue(); + APInt Comparand = CmpRHSC->getAPIntValue(); const unsigned Opc = CmpLHS.getOpcode(); @@ -29446,16 +29455,19 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC, if (Opc == ISD::ATOMIC_LOAD_SUB) Addend = -Addend; - if (CC == X86::COND_S && Addend == 1) + if (Comparand == -Addend) { + // No change to CC. + } else if (CC == X86::COND_S && Comparand == 0 && Addend == 1) { CC = X86::COND_LE; - else if (CC == X86::COND_NS && Addend == 1) + } else if (CC == X86::COND_NS && Comparand == 0 && Addend == 1) { CC = X86::COND_G; - else if (CC == X86::COND_G && Addend == -1) + } else if (CC == X86::COND_G && Comparand == 0 && Addend == -1) { CC = X86::COND_GE; - else if (CC == X86::COND_LE && Addend == -1) + } else if (CC == X86::COND_LE && Comparand == 0 && Addend == -1) { CC = X86::COND_L; - else + } else { return SDValue(); + } SDValue LockOp = lowerAtomicArithWithLOCK(CmpLHS, DAG); DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0), diff --git a/test/CodeGen/X86/atomic-eflags-reuse.ll b/test/CodeGen/X86/atomic-eflags-reuse.ll index 9521a2afefc..9902325fd14 100644 --- a/test/CodeGen/X86/atomic-eflags-reuse.ll +++ b/test/CodeGen/X86/atomic-eflags-reuse.ll @@ -192,4 +192,68 @@ entry: ret i8 %s2 } +define i8 @test_sub_1_setcc_eq(i64* %p) #0 { +; CHECK-LABEL: test_sub_1_setcc_eq: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: lock decq (%rdi) +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq +entry: + %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst + %tmp1 = icmp eq i64 %tmp0, 1 + %tmp2 = zext i1 %tmp1 to i8 + ret i8 %tmp2 +} + +define i8 @test_add_5_setcc_ne(i64* %p) #0 { +; CHECK-LABEL: test_add_5_setcc_ne: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: lock addq $5, (%rdi) +; CHECK-NEXT: setne %al +; CHECK-NEXT: retq +entry: + %tmp0 = atomicrmw add i64* %p, i64 5 seq_cst + %tmp1 = icmp ne i64 %tmp0, -5 + %tmp2 = zext i1 %tmp1 to i8 + ret i8 %tmp2 +} + +define i8 @test_add_5_setcc_ne_comparand_mismatch(i64* %p) #0 { +; CHECK-LABEL: test_add_5_setcc_ne_comparand_mismatch: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movl $5, %eax +; CHECK-NEXT: lock xaddq %rax, (%rdi) +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: setne %al +; CHECK-NEXT: retq +entry: + %tmp0 = atomicrmw add i64* %p, i64 5 seq_cst + %tmp1 = icmp ne i64 %tmp0, 0 + %tmp2 = zext i1 %tmp1 to i8 + ret i8 %tmp2 +} + +declare void @g() +define zeroext i1 @test_sub_1_setcc_jcc(i64* %p) local_unnamed_addr #0 { +; TODO: It's possible to use "lock dec" here, but both uses of the cmp need to +; be updated. +; CHECK-LABEL: test_sub_1_setcc_jcc: +; CHECK: # BB#0: # %entry +; CHECK: movq $-1, %rax +; CHECK-NEXT: lock xaddq %rax, (%rdi) +; CHECK-NEXT: cmpq $1, %rax +; CHECK-NEXT: sete %bl +; CHECK-NEXT: jne +entry: + %add = atomicrmw volatile add i64* %p, i64 -1 seq_cst + %cmp = icmp ne i64 %add, 1 + %not = xor i1 %cmp, true + br i1 %cmp, label %else, label %then +then: + tail call void @g() + br label %else +else: + ret i1 %not +} + attributes #0 = { nounwind } -- 2.40.0