From: Sanjay Patel <spatel@rotateright.com>
Date: Wed, 5 Apr 2017 14:09:39 +0000 (+0000)
Subject: [DAGCombiner] add and use TLI hook to convert and-of-seteq / or-of-setne to bitwise... 
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a070c921e55a25ba93481c0c0e0339e864c450ab;p=llvm

[DAGCombiner] add and use TLI hook to convert and-of-seteq / or-of-setne to bitwise logic+setcc (PR32401)

This is a generic combine enabled via target hook to reduce icmp logic as discussed in:
https://bugs.llvm.org/show_bug.cgi?id=32401

It's likely that other targets will want to enable this hook for scalar transforms,
and there are probably other patterns that can use bitwise logic to reduce comparisons.

Note that we are missing an IR canonicalization for these patterns, and we will probably
prefer the pair-of-compares form in IR (shorter, more likely to fold).

Differential Revision: https://reviews.llvm.org/D31483


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@299542 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 97e78d60844..240896a538f 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -437,6 +437,15 @@ public:
     return false;
   }
 
+  /// Use bitwise logic to make pairs of compares more efficient. For example:
+  /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
+  /// This should be true when it takes more than one instruction to lower
+  /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on
+  /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win.
+  virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const {
+    return false;
+  }
+
   /// Return the preferred operand type if the target has a quick way to compare
   /// integer values of the given size. Assume that any legal integer type can
   /// be compared efficiently. Targets may override this to allow illegal wide
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0b4fb90bc8c..c9e8a77f03a 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3255,6 +3255,21 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
   }
 
+  // Try more general transforms if the predicates match and the only user of
+  // the compares is the 'and' or 'or'.
+  if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
+      N0.hasOneUse() && N1.hasOneUse()) {
+    // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
+    // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
+    if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
+      SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
+      SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
+      SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
+      SDValue Zero = DAG.getConstant(0, DL, OpVT);
+      return DAG.getSetCC(DL, VT, Or, Zero, CC1);
+    }
+  }
+
   // Canonicalize equivalent operands to LL == RL.
   if (LL == RR && LR == RL) {
     CC1 = ISD::getSetCCSwappedOperands(CC1);
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index ad632427470..198000e5b5b 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -515,6 +515,10 @@ class InstrItineraryData;
     bool isCheapToSpeculateCttz() const override;
     bool isCheapToSpeculateCtlz() const override;
 
+    bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
+      return VT.isScalarInteger();
+    }
+
     bool supportSwiftError() const override {
       return true;
     }
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 3dc9a9f184c..6113eb58f42 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -531,6 +531,10 @@ namespace llvm {
       return true;
     }
 
+    bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
+      return VT.isScalarInteger();
+    }
+
     bool supportSplitCSR(MachineFunction *MF) const override {
       return
         MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 13642c219e8..ab4910daca0 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -814,6 +814,10 @@ namespace llvm {
 
     bool hasAndNotCompare(SDValue Y) const override;
 
+    bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
+      return VT.isScalarInteger();
+    }
+
     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
     MVT hasFastEqualityCompare(unsigned NumBits) const override;
 
diff --git a/test/CodeGen/ARM/setcc-logic.ll b/test/CodeGen/ARM/setcc-logic.ll
index bfd188fb10d..79bae1facb3 100644
--- a/test/CodeGen/ARM/setcc-logic.ll
+++ b/test/CodeGen/ARM/setcc-logic.ll
@@ -20,13 +20,11 @@ define zeroext i1 @ne_neg1_and_ne_zero(i32 %x) nounwind {
 define zeroext i1 @and_eq(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 ; CHECK-LABEL: and_eq:
 ; CHECK:       @ BB#0:
-; CHECK-NEXT:    cmp r2, r3
-; CHECK-NEXT:    mov r2, #0
-; CHECK-NEXT:    movweq r2, #1
-; CHECK-NEXT:    mov r12, #0
-; CHECK-NEXT:    cmp r0, r1
-; CHECK-NEXT:    movweq r12, #1
-; CHECK-NEXT:    and r0, r12, r2
+; CHECK-NEXT:    eor r2, r2, r3
+; CHECK-NEXT:    eor r0, r0, r1
+; CHECK-NEXT:    orrs r0, r0, r2
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movweq r0, #1
 ; CHECK-NEXT:    bx lr
   %cmp1 = icmp eq i32 %a, %b
   %cmp2 = icmp eq i32 %c, %d
@@ -37,13 +35,10 @@ define zeroext i1 @and_eq(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 define zeroext i1 @or_ne(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 ; CHECK-LABEL: or_ne:
 ; CHECK:       @ BB#0:
-; CHECK-NEXT:    cmp r2, r3
-; CHECK-NEXT:    mov r2, #0
-; CHECK-NEXT:    movwne r2, #1
-; CHECK-NEXT:    mov r12, #0
-; CHECK-NEXT:    cmp r0, r1
-; CHECK-NEXT:    movwne r12, #1
-; CHECK-NEXT:    orr r0, r12, r2
+; CHECK-NEXT:    eor r2, r2, r3
+; CHECK-NEXT:    eor r0, r0, r1
+; CHECK-NEXT:    orrs r0, r0, r2
+; CHECK-NEXT:    movwne r0, #1
 ; CHECK-NEXT:    bx lr
   %cmp1 = icmp ne i32 %a, %b
   %cmp2 = icmp ne i32 %c, %d
diff --git a/test/CodeGen/PowerPC/setcc-logic.ll b/test/CodeGen/PowerPC/setcc-logic.ll
index 09cc1063ffd..2ed08e2ae38 100644
--- a/test/CodeGen/PowerPC/setcc-logic.ll
+++ b/test/CodeGen/PowerPC/setcc-logic.ll
@@ -433,11 +433,11 @@ define zeroext i1 @ne_neg1_and_ne_zero(i64 %x) {
 define zeroext i1 @and_eq(i16 zeroext  %a, i16 zeroext %b, i16 zeroext %c, i16 zeroext %d) {
 ; CHECK-LABEL: and_eq:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    cmpw 0, 3, 4
-; CHECK-NEXT:    cmpw 1, 5, 6
-; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:    crnand 20, 2, 6
-; CHECK-NEXT:    isel 3, 0, 3, 20
+; CHECK-NEXT:    xor 5, 5, 6
+; CHECK-NEXT:    xor 3, 3, 4
+; CHECK-NEXT:    or 3, 3, 5
+; CHECK-NEXT:    cntlzw 3, 3
+; CHECK-NEXT:    rlwinm 3, 3, 27, 31, 31
 ; CHECK-NEXT:    blr
   %cmp1 = icmp eq i16 %a, %b
   %cmp2 = icmp eq i16 %c, %d
@@ -448,11 +448,12 @@ define zeroext i1 @and_eq(i16 zeroext  %a, i16 zeroext %b, i16 zeroext %c, i16 z
 define zeroext i1 @or_ne(i32 %a, i32 %b, i32 %c, i32 %d) {
 ; CHECK-LABEL: or_ne:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    cmpw 0, 3, 4
-; CHECK-NEXT:    cmpw 1, 5, 6
-; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:    crand 20, 6, 2
-; CHECK-NEXT:    isel 3, 0, 3, 20
+; CHECK-NEXT:    xor 5, 5, 6
+; CHECK-NEXT:    xor 3, 3, 4
+; CHECK-NEXT:    or 3, 3, 5
+; CHECK-NEXT:    cntlzw 3, 3
+; CHECK-NEXT:    nor 3, 3, 3
+; CHECK-NEXT:    rlwinm 3, 3, 27, 31, 31
 ; CHECK-NEXT:    blr
   %cmp1 = icmp ne i32 %a, %b
   %cmp2 = icmp ne i32 %c, %d
diff --git a/test/CodeGen/X86/avx512-cmp.ll b/test/CodeGen/X86/avx512-cmp.ll
index fcfb9955b5b..c1b64743f89 100644
--- a/test/CodeGen/X86/avx512-cmp.ll
+++ b/test/CodeGen/X86/avx512-cmp.ll
@@ -120,12 +120,12 @@ entry:
 define i32 @test8(i32 %a1, i32 %a2, i32 %a3) {
 ; ALL-LABEL: test8:
 ; ALL:       ## BB#0:
+; ALL-NEXT:    notl %edi
+; ALL-NEXT:    xorl $-2147483648, %esi ## imm = 0x80000000
 ; ALL-NEXT:    testl %edx, %edx
 ; ALL-NEXT:    movl $1, %eax
 ; ALL-NEXT:    cmovel %eax, %edx
-; ALL-NEXT:    cmpl $-2147483648, %esi ## imm = 0x80000000
-; ALL-NEXT:    cmovnel %edx, %eax
-; ALL-NEXT:    cmpl $-1, %edi
+; ALL-NEXT:    orl %edi, %esi
 ; ALL-NEXT:    cmovnel %edx, %eax
 ; ALL-NEXT:    retq
   %tmp1 = icmp eq i32 %a1, -1
diff --git a/test/CodeGen/X86/setcc-logic.ll b/test/CodeGen/X86/setcc-logic.ll
index 8e6c149667c..4d1e5ba1654 100644
--- a/test/CodeGen/X86/setcc-logic.ll
+++ b/test/CodeGen/X86/setcc-logic.ll
@@ -440,11 +440,10 @@ define zeroext i1 @ne_neg1_and_ne_zero(i64 %x) nounwind {
 define zeroext i1 @and_eq(i8 %a, i8 %b, i8 %c, i8 %d) nounwind {
 ; CHECK-LABEL: and_eq:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    cmpb %sil, %dil
-; CHECK-NEXT:    sete %sil
-; CHECK-NEXT:    cmpb %cl, %dl
+; CHECK-NEXT:    xorl %esi, %edi
+; CHECK-NEXT:    xorl %ecx, %edx
+; CHECK-NEXT:    orb %dl, %dil
 ; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    andb %sil, %al
 ; CHECK-NEXT:    retq
   %cmp1 = icmp eq i8 %a, %b
   %cmp2 = icmp eq i8 %c, %d
@@ -455,11 +454,10 @@ define zeroext i1 @and_eq(i8 %a, i8 %b, i8 %c, i8 %d) nounwind {
 define zeroext i1 @or_ne(i8 %a, i8 %b, i8 %c, i8 %d) nounwind {
 ; CHECK-LABEL: or_ne:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    cmpb %sil, %dil
-; CHECK-NEXT:    setne %sil
-; CHECK-NEXT:    cmpb %cl, %dl
+; CHECK-NEXT:    xorl %esi, %edi
+; CHECK-NEXT:    xorl %ecx, %edx
+; CHECK-NEXT:    orb %dl, %dil
 ; CHECK-NEXT:    setne %al
-; CHECK-NEXT:    orb %sil, %al
 ; CHECK-NEXT:    retq
   %cmp1 = icmp ne i8 %a, %b
   %cmp2 = icmp ne i8 %c, %d