From 897e7ff8d5200d2c3e95c0bb5f83b23e594b200b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 3 Jul 2019 17:06:59 +0000 Subject: [PATCH] [X86] ComputeNumSignBitsForTargetNode - add target shuffle support. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@365057 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 51 +++++++++++++++++++++++++- test/CodeGen/X86/vector-reduce-smax.ll | 16 -------- test/CodeGen/X86/vector-reduce-smin.ll | 16 -------- 3 files changed, 50 insertions(+), 33 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index fc264a74975..efe92543f3a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -31271,7 +31271,8 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode( SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { - unsigned VTBits = Op.getScalarValueSizeInBits(); + EVT VT = Op.getValueType(); + unsigned VTBits = VT.getScalarSizeInBits(); unsigned Opcode = Op.getOpcode(); switch (Opcode) { case X86ISD::SETCC_CARRY: @@ -31353,6 +31354,54 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode( } } + // Handle target shuffles. + // TODO - use resolveTargetShuffleInputs once we can limit recursive depth. + if (isTargetShuffle(Opcode)) { + bool IsUnary; + SmallVector Mask; + SmallVector Ops; + if (getTargetShuffleMask(Op.getNode(), VT.getSimpleVT(), true, Ops, Mask, + IsUnary)) { + unsigned NumOps = Ops.size(); + unsigned NumElts = VT.getVectorNumElements(); + if (Mask.size() == NumElts) { + SmallVector DemandedOps(NumOps, APInt(NumElts, 0)); + for (unsigned i = 0; i != NumElts; ++i) { + if (!DemandedElts[i]) + continue; + int M = Mask[i]; + if (M == SM_SentinelUndef) { + // For UNDEF elements, we don't know anything about the common state + // of the shuffle result. + return 1; + } else if (M == SM_SentinelZero) { + // Zero = all sign bits. + continue; + } + assert(0 <= M && (unsigned)M < (NumOps * NumElts) && + "Shuffle index out of range"); + + unsigned OpIdx = (unsigned)M / NumElts; + unsigned EltIdx = (unsigned)M % NumElts; + if (Ops[OpIdx].getValueType() != VT) { + // TODO - handle target shuffle ops with different value types. + return 1; + } + DemandedOps[OpIdx].setBit(EltIdx); + } + unsigned Tmp0 = VTBits; + for (unsigned i = 0; i != NumOps && Tmp0 > 1; ++i) { + if (!DemandedOps[i]) + continue; + unsigned Tmp1 = + DAG.ComputeNumSignBits(Ops[i], DemandedOps[i], Depth + 1); + Tmp0 = std::min(Tmp0, Tmp1); + } + return Tmp0; + } + } + } + // Fallback case. return 1; } diff --git a/test/CodeGen/X86/vector-reduce-smax.ll b/test/CodeGen/X86/vector-reduce-smax.ll index 81049f66580..057174249ff 100644 --- a/test/CodeGen/X86/vector-reduce-smax.ll +++ b/test/CodeGen/X86/vector-reduce-smax.ll @@ -1277,8 +1277,6 @@ define i16 @test_v4i16(<4 x i16> %a0) { ; SSE2-NEXT: pandn %xmm1, %xmm2 ; SSE2-NEXT: por %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] -; SSE2-NEXT: pslld $16, %xmm0 -; SSE2-NEXT: psrad $16, %xmm0 ; SSE2-NEXT: movdqa %xmm2, %xmm1 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 ; SSE2-NEXT: pand %xmm1, %xmm2 @@ -1297,8 +1295,6 @@ define i16 @test_v4i16(<4 x i16> %a0) { ; SSE41-NEXT: psrad $16, %xmm1 ; SSE41-NEXT: pmaxsd %xmm0, %xmm1 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE41-NEXT: pslld $16, %xmm0 -; SSE41-NEXT: psrad $16, %xmm0 ; SSE41-NEXT: pmaxsd %xmm1, %xmm0 ; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax @@ -1313,8 +1309,6 @@ define i16 @test_v4i16(<4 x i16> %a0) { ; AVX-NEXT: vpsrad $16, %xmm0, %xmm0 ; AVX-NEXT: vpmaxsd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX-NEXT: vpslld $16, %xmm1, %xmm1 -; AVX-NEXT: vpsrad $16, %xmm1, %xmm1 ; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax @@ -1329,8 +1323,6 @@ define i16 @test_v4i16(<4 x i16> %a0) { ; AVX512-NEXT: vpsrad $16, %xmm0, %xmm0 ; AVX512-NEXT: vpmaxsd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX512-NEXT: vpslld $16, %xmm1, %xmm1 -; AVX512-NEXT: vpsrad $16, %xmm1, %xmm1 ; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax @@ -1764,8 +1756,6 @@ define i8 @test_v4i8(<4 x i8> %a0) { ; SSE2-NEXT: pandn %xmm1, %xmm2 ; SSE2-NEXT: por %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] -; SSE2-NEXT: pslld $24, %xmm0 -; SSE2-NEXT: psrad $24, %xmm0 ; SSE2-NEXT: movdqa %xmm2, %xmm1 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 ; SSE2-NEXT: pand %xmm1, %xmm2 @@ -1784,8 +1774,6 @@ define i8 @test_v4i8(<4 x i8> %a0) { ; SSE41-NEXT: psrad $24, %xmm1 ; SSE41-NEXT: pmaxsd %xmm0, %xmm1 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE41-NEXT: pslld $24, %xmm0 -; SSE41-NEXT: psrad $24, %xmm0 ; SSE41-NEXT: pmaxsd %xmm1, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax @@ -1800,8 +1788,6 @@ define i8 @test_v4i8(<4 x i8> %a0) { ; AVX-NEXT: vpsrad $24, %xmm0, %xmm0 ; AVX-NEXT: vpmaxsd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX-NEXT: vpslld $24, %xmm1, %xmm1 -; AVX-NEXT: vpsrad $24, %xmm1, %xmm1 ; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpextrb $0, %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax @@ -1816,8 +1802,6 @@ define i8 @test_v4i8(<4 x i8> %a0) { ; AVX512-NEXT: vpsrad $24, %xmm0, %xmm0 ; AVX512-NEXT: vpmaxsd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX512-NEXT: vpslld $24, %xmm1, %xmm1 -; AVX512-NEXT: vpsrad $24, %xmm1, %xmm1 ; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax diff --git a/test/CodeGen/X86/vector-reduce-smin.ll b/test/CodeGen/X86/vector-reduce-smin.ll index 8cb716f879a..30692a65be8 100644 --- a/test/CodeGen/X86/vector-reduce-smin.ll +++ b/test/CodeGen/X86/vector-reduce-smin.ll @@ -1276,8 +1276,6 @@ define i16 @test_v4i16(<4 x i16> %a0) { ; SSE2-NEXT: pandn %xmm1, %xmm2 ; SSE2-NEXT: por %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] -; SSE2-NEXT: pslld $16, %xmm0 -; SSE2-NEXT: psrad $16, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm1 ; SSE2-NEXT: pand %xmm1, %xmm2 @@ -1296,8 +1294,6 @@ define i16 @test_v4i16(<4 x i16> %a0) { ; SSE41-NEXT: psrad $16, %xmm1 ; SSE41-NEXT: pminsd %xmm0, %xmm1 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE41-NEXT: pslld $16, %xmm0 -; SSE41-NEXT: psrad $16, %xmm0 ; SSE41-NEXT: pminsd %xmm1, %xmm0 ; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax @@ -1312,8 +1308,6 @@ define i16 @test_v4i16(<4 x i16> %a0) { ; AVX-NEXT: vpsrad $16, %xmm0, %xmm0 ; AVX-NEXT: vpminsd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX-NEXT: vpslld $16, %xmm1, %xmm1 -; AVX-NEXT: vpsrad $16, %xmm1, %xmm1 ; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax @@ -1328,8 +1322,6 @@ define i16 @test_v4i16(<4 x i16> %a0) { ; AVX512-NEXT: vpsrad $16, %xmm0, %xmm0 ; AVX512-NEXT: vpminsd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX512-NEXT: vpslld $16, %xmm1, %xmm1 -; AVX512-NEXT: vpsrad $16, %xmm1, %xmm1 ; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax @@ -1763,8 +1755,6 @@ define i8 @test_v4i8(<4 x i8> %a0) { ; SSE2-NEXT: pandn %xmm1, %xmm2 ; SSE2-NEXT: por %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] -; SSE2-NEXT: pslld $24, %xmm0 -; SSE2-NEXT: psrad $24, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm1 ; SSE2-NEXT: pand %xmm1, %xmm2 @@ -1783,8 +1773,6 @@ define i8 @test_v4i8(<4 x i8> %a0) { ; SSE41-NEXT: psrad $24, %xmm1 ; SSE41-NEXT: pminsd %xmm0, %xmm1 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE41-NEXT: pslld $24, %xmm0 -; SSE41-NEXT: psrad $24, %xmm0 ; SSE41-NEXT: pminsd %xmm1, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax @@ -1799,8 +1787,6 @@ define i8 @test_v4i8(<4 x i8> %a0) { ; AVX-NEXT: vpsrad $24, %xmm0, %xmm0 ; AVX-NEXT: vpminsd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX-NEXT: vpslld $24, %xmm1, %xmm1 -; AVX-NEXT: vpsrad $24, %xmm1, %xmm1 ; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpextrb $0, %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax @@ -1815,8 +1801,6 @@ define i8 @test_v4i8(<4 x i8> %a0) { ; AVX512-NEXT: vpsrad $24, %xmm0, %xmm0 ; AVX512-NEXT: vpminsd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX512-NEXT: vpslld $24, %xmm1, %xmm1 -; AVX512-NEXT: vpsrad $24, %xmm1, %xmm1 ; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax -- 2.50.1