From 2df18c8dc0d4b38c9796d2702227b4166d25be0c Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 15 Jun 2016 20:26:58 +0000 Subject: [PATCH] [x86] add folds for x86 vector compare nodes (PR27924) Ideally, we can get rid of most x86 LLVM intrinsics by transforming them to IR (and some of that happened with http://reviews.llvm.org/rL272807), but it doesn't cost much to have some simple folds in the backend too while we're working on that and as a backstop. This fixes: https://llvm.org/bugs/show_bug.cgi?id=27924 Differential Revision: http://reviews.llvm.org/D21356 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@272828 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 18 ++++++++++++++++++ test/CodeGen/X86/vector-compare-combines.ll | 10 +++------- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2dc71f100d0..61b8231eb9f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -30458,6 +30458,22 @@ static SDValue combineTestM(SDNode *N, SelectionDAG &DAG) { Op0->getOperand(0), Op0->getOperand(1)); } +static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + MVT VT = N->getSimpleValueType(0); + SDLoc DL(N); + + if (N->getOperand(0) == N->getOperand(1)) { + if (N->getOpcode() == X86ISD::PCMPEQ) + return getOnesVector(VT, Subtarget, DAG, DL); + if (N->getOpcode() == X86ISD::PCMPGT) + return getZeroVector(VT, Subtarget, DAG, DL); + } + + return SDValue(); +} + + SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -30538,6 +30554,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::MSCATTER: return combineGatherScatter(N, DAG); case X86ISD::LSUB: return combineLockSub(N, DAG, Subtarget); case X86ISD::TESTM: return combineTestM(N, DAG); + case X86ISD::PCMPEQ: + case X86ISD::PCMPGT: return combineVectorCompare(N, DAG, Subtarget); } return SDValue(); diff --git a/test/CodeGen/X86/vector-compare-combines.ll b/test/CodeGen/X86/vector-compare-combines.ll index 11272dbdead..c25474d92f9 100644 --- a/test/CodeGen/X86/vector-compare-combines.ll +++ b/test/CodeGen/X86/vector-compare-combines.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE42 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 -; FIXME: If we have SSE/AVX intrinsics in the code, we miss obvious combines +; If we have SSE/AVX intrinsics in the code, we miss obvious combines ; unless we do them late on X86-specific nodes. declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) @@ -10,13 +10,11 @@ declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) define <4 x i32> @PR27924_cmpeq(<4 x i32> %a, <4 x i32> %b) { ; SSE-LABEL: PR27924_cmpeq: ; SSE: # BB#0: -; SSE-NEXT: pmaxsd %xmm1, %xmm0 ; SSE-NEXT: pcmpeqd %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: PR27924_cmpeq: ; AVX: # BB#0: -; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq ; @@ -31,14 +29,12 @@ define <4 x i32> @PR27924_cmpeq(<4 x i32> %a, <4 x i32> %b) { define <4 x i32> @PR27924_cmpgt(<4 x i32> %a, <4 x i32> %b) { ; SSE-LABEL: PR27924_cmpgt: ; SSE: # BB#0: -; SSE-NEXT: pmaxsd %xmm1, %xmm0 -; SSE-NEXT: pcmpgtd %xmm0, %xmm0 +; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: PR27924_cmpgt: ; AVX: # BB#0: -; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpcmpgtd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq ; %cmp = icmp sgt <4 x i32> %a, %b -- 2.50.1