[x86] add folds for x86 vector compare nodes (PR27924)

author Sanjay Patel <spatel@rotateright.com>

Wed, 15 Jun 2016 20:26:58 +0000 (20:26 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Wed, 15 Jun 2016 20:26:58 +0000 (20:26 +0000)
author Sanjay Patel <spatel@rotateright.com>
Wed, 15 Jun 2016 20:26:58 +0000 (20:26 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Wed, 15 Jun 2016 20:26:58 +0000 (20:26 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 2dc71f100d09a61ffa2aa8f164352fdcedd7af62..61b8231eb9f261c088e71bf7d8abe42d3d6bbead 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -30458,6 +30458,22 @@ static SDValue combineTestM(SDNode *N, SelectionDAG &DAG) {
                       Op0->getOperand(0), Op0->getOperand(1));
  }
  
+static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
+                                    const X86Subtarget &Subtarget) {
+  MVT VT = N->getSimpleValueType(0);
+  SDLoc DL(N);
+
+  if (N->getOperand(0) == N->getOperand(1)) {
+    if (N->getOpcode() == X86ISD::PCMPEQ)
+      return getOnesVector(VT, Subtarget, DAG, DL);
+    if (N->getOpcode() == X86ISD::PCMPGT)
+      return getZeroVector(VT, Subtarget, DAG, DL);
+  }
+
+  return SDValue();
+}
+
+
  SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
                                               DAGCombinerInfo &DCI) const {
    SelectionDAG &DAG = DCI.DAG;
@@ -30538,6 +30554,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
    case ISD::MSCATTER:       return combineGatherScatter(N, DAG);
    case X86ISD::LSUB:        return combineLockSub(N, DAG, Subtarget);
    case X86ISD::TESTM:       return combineTestM(N, DAG);
+  case X86ISD::PCMPEQ:
+  case X86ISD::PCMPGT:      return combineVectorCompare(N, DAG, Subtarget);
    }
  
    return SDValue();
diff --git a/test/CodeGen/X86/vector-compare-combines.ll b/test/CodeGen/X86/vector-compare-combines.ll

index 11272dbdead0045e8f8cf0a4c18d64e1cf9a9cca..c25474d92f9cd7ac570460a3a5b97d1d3e167df6 100644 (file)
--- a/test/CodeGen/X86/vector-compare-combines.ll
+++ b/test/CodeGen/X86/vector-compare-combines.ll
@@ -2,7 +2,7 @@
  ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE42
  ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
  
-; FIXME: If we have SSE/AVX intrinsics in the code, we miss obvious combines
+; If we have SSE/AVX intrinsics in the code, we miss obvious combines
  ; unless we do them late on X86-specific nodes.
  
  declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>)
@@ -10,13 +10,11 @@ declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>)
  define <4 x i32> @PR27924_cmpeq(<4 x i32> %a, <4 x i32> %b) {
  ; SSE-LABEL: PR27924_cmpeq:
  ; SSE:       # BB#0:
-; SSE-NEXT:    pmaxsd %xmm1, %xmm0
  ; SSE-NEXT:    pcmpeqd %xmm0, %xmm0
  ; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: PR27924_cmpeq:
  ; AVX:       # BB#0:
-; AVX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
  ; AVX-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
  ; AVX-NEXT:    retq
  ;
@@ -31,14 +29,12 @@ define <4 x i32> @PR27924_cmpeq(<4 x i32> %a, <4 x i32> %b) {
  define <4 x i32> @PR27924_cmpgt(<4 x i32> %a, <4 x i32> %b) {
  ; SSE-LABEL: PR27924_cmpgt:
  ; SSE:       # BB#0:
-; SSE-NEXT:    pmaxsd %xmm1, %xmm0
-; SSE-NEXT:    pcmpgtd %xmm0, %xmm0
+; SSE-NEXT:    xorps %xmm0, %xmm0
  ; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: PR27924_cmpgt:
  ; AVX:       # BB#0:
-; AVX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpcmpgtd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
  ; AVX-NEXT:    retq
  ;
    %cmp = icmp sgt <4 x i32> %a, %b
author	Sanjay Patel <spatel@rotateright.com>
	Wed, 15 Jun 2016 20:26:58 +0000 (20:26 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Wed, 15 Jun 2016 20:26:58 +0000 (20:26 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/vector-compare-combines.ll		patch \| blob \| history