From dc0e67d2a5ed458e10c0a79348fbf66c4f5718e6 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 22 Jun 2017 18:11:19 +0000 Subject: [PATCH] [x86] add/sub (X==0) --> sbb(neg X) Our handling of select-of-constants is lumpy in IR (https://reviews.llvm.org/D24480), lumpy in DAGCombiner, and lumpy in X86ISelLowering. That's why we only had the 'sbb' codegen in 1 out of the 4 tests. This is a step towards smoothing that out. First, show that all of these IR forms are equivalent: http://rise4fun.com/Alive/mx Second, show that the 'sbb' version is faster/smaller. IACA output for SandyBridge (later Intel and AMD chips are similar based on Agner's tables): This is the "obvious" x86 codegen (what gcc appears to produce currently): | Num Of | Ports pressure in cycles | | | Uops | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | | --------------------------------------------------------------------- | 1* | | | | | | | | xor eax, eax | 1 | 1.0 | | | | | | CP | test edi, edi | 1 | | | | | | 1.0 | CP | setnz al | 1 | | 1.0 | | | | | CP | neg eax This is the adc version: | 1* | | | | | | | | xor eax, eax | 1 | 1.0 | | | | | | CP | cmp edi, 0x1 | 2 | | 1.0 | | | | 1.0 | CP | adc eax, 0xffffffff And this is sbb: | 1 | 1.0 | | | | | | | neg edi | 2 | | 1.0 | | | | 1.0 | CP | sbb eax, eax If IACA is trustworthy, then sbb became a single uop in Broadwell, so this will be clearly better than the alternatives going forward. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306040 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 22 +++++++++++++++++++--- test/CodeGen/X86/sbb.ll | 15 ++++++--------- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 679fa81f7f8..2b51137901f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -34897,13 +34897,29 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { !Cmp.getOperand(0).getValueType().isInteger()) return SDValue(); - // (cmp Z, 1) sets the carry flag if Z is 0. SDValue Z = Cmp.getOperand(0); + SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32); + + // If X is -1 or 0, then we have an opportunity to avoid constants required by + // the cmp transform below. 'neg' sets the carry flag when Z != 0, so create 0 + // or -1 using 'sbb' with fake operands: + // 0 - (Z != 0) --> sbb %eax, %eax, (neg Z) + // -1 + (Z == 0) --> sbb %eax, %eax, (neg Z) + if (auto *ConstantX = dyn_cast(X)) { + if ((IsSub && CC == X86::COND_NE && ConstantX->isNullValue()) || + (!IsSub && CC == X86::COND_E && ConstantX->isAllOnesValue())) { + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs, Zero, Z); + return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, + DAG.getConstant(X86::COND_B, DL, MVT::i8), + SDValue(Neg.getNode(), 1)); + } + } + + // (cmp Z, 1) sets the carry flag if Z is 0. SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z, DAG.getConstant(1, DL, Z.getValueType())); - SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32); - // X - (Z != 0) --> sub X, (zext(setne Z, 0)) --> adc X, -1, (cmp Z, 1) // X + (Z != 0) --> add X, (zext(setne Z, 0)) --> sbb X, -1, (cmp Z, 1) if (CC == X86::COND_NE) diff --git a/test/CodeGen/X86/sbb.ll b/test/CodeGen/X86/sbb.ll index 6ff207ce35b..062c5d26247 100644 --- a/test/CodeGen/X86/sbb.ll +++ b/test/CodeGen/X86/sbb.ll @@ -8,9 +8,8 @@ define i8 @i8_select_0_or_neg1(i8 %x) { ; CHECK-LABEL: i8_select_0_or_neg1: ; CHECK: # BB#0: -; CHECK-NEXT: cmpb $1, %dil -; CHECK-NEXT: movb $-1, %al -; CHECK-NEXT: adcb $0, %al +; CHECK-NEXT: negb %dil +; CHECK-NEXT: sbbb %al, %al ; CHECK-NEXT: retq %cmp = icmp eq i8 %x, 0 %sel = select i1 %cmp, i8 0, i8 -1 @@ -22,9 +21,8 @@ define i8 @i8_select_0_or_neg1(i8 %x) { define i16 @i16_select_0_or_neg1_as_math(i16 %x) { ; CHECK-LABEL: i16_select_0_or_neg1_as_math: ; CHECK: # BB#0: -; CHECK-NEXT: cmpw $1, %di -; CHECK-NEXT: movw $-1, %ax -; CHECK-NEXT: adcw $0, %ax +; CHECK-NEXT: negw %di +; CHECK-NEXT: sbbw %ax, %ax ; CHECK-NEXT: retq %cmp = icmp eq i16 %x, 0 %ext = zext i1 %cmp to i16 @@ -50,9 +48,8 @@ define i32 @i32_select_0_or_neg1_commuted(i32 %x) { define i64 @i64_select_0_or_neg1_commuted_as_math(i64 %x) { ; CHECK-LABEL: i64_select_0_or_neg1_commuted_as_math: ; CHECK: # BB#0: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpq $1, %rdi -; CHECK-NEXT: adcq $-1, %rax +; CHECK-NEXT: negq %rdi +; CHECK-NEXT: sbbq %rax, %rax ; CHECK-NEXT: retq %cmp = icmp ne i64 %x, 0 %ext = zext i1 %cmp to i64 -- 2.50.1