From 72a3a14d8bb47c2cbd531eb544335e725d2e86cc Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 13 May 2017 22:10:58 +0000 Subject: [PATCH] [SelectionDAG] Added support for EXTRACT_SUBVECTOR/CONCAT_VECTORS demandedelts in ComputeNumSignBits git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@302997 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 36 ++++++++++++++++++----- test/CodeGen/X86/known-signbits-vector.ll | 27 +++-------------- 2 files changed, 33 insertions(+), 30 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index c5e5193421b..057badcd6b7 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3171,14 +3171,36 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return ComputeNumSignBits(InVec, DemandedSrcElts, Depth + 1); } - case ISD::EXTRACT_SUBVECTOR: - return ComputeNumSignBits(Op.getOperand(0), Depth + 1); + case ISD::EXTRACT_SUBVECTOR: { + // If we know the element index, just demand that subvector elements, + // otherwise demand them all. + SDValue Src = Op.getOperand(0); + ConstantSDNode *SubIdx = dyn_cast(Op.getOperand(1)); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { + // Offset the demanded elts by the subvector index. + uint64_t Idx = SubIdx->getZExtValue(); + APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx); + return ComputeNumSignBits(Src, DemandedSrc, Depth + 1); + } + return ComputeNumSignBits(Src, Depth + 1); + } case ISD::CONCAT_VECTORS: - // Determine the minimum number of sign bits across all input vectors. - // Early out if the result is already 1. - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); - for (unsigned i = 1, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) - Tmp = std::min(Tmp, ComputeNumSignBits(Op.getOperand(i), Depth + 1)); + // Determine the minimum number of sign bits across all demanded + // elts of the input vectors. Early out if the result is already 1. + Tmp = UINT_MAX; + EVT SubVectorVT = Op.getOperand(0).getValueType(); + unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements(); + unsigned NumSubVectors = Op.getNumOperands(); + for (unsigned i = 0; (i < NumSubVectors) && (Tmp > 1); ++i) { + APInt DemandedSub = DemandedElts.lshr(i * NumSubVectorElts); + DemandedSub = DemandedSub.trunc(NumSubVectorElts); + if (!DemandedSub) + continue; + Tmp2 = ComputeNumSignBits(Op.getOperand(i), DemandedSub, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } + assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); return Tmp; } diff --git a/test/CodeGen/X86/known-signbits-vector.ll b/test/CodeGen/X86/known-signbits-vector.ll index c8359097f65..d526b5dd786 100644 --- a/test/CodeGen/X86/known-signbits-vector.ll +++ b/test/CodeGen/X86/known-signbits-vector.ll @@ -173,10 +173,6 @@ define <4 x double> @signbits_sext_shuffle_sitofp(<4 x i32> %a0, <4 x i64> %a1) define <2 x double> @signbits_ashr_concat_ashr_extract_sitofp(<2 x i64> %a0, <4 x i64> %a1) nounwind { ; X32-LABEL: signbits_ashr_concat_ashr_extract_sitofp: ; X32: # BB#0: -; X32-NEXT: pushl %ebp -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $32, %esp ; X32-NEXT: vpsrad $16, %xmm0, %xmm1 ; X32-NEXT: vpsrlq $16, %xmm0, %xmm0 ; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] @@ -187,18 +183,8 @@ define <2 x double> @signbits_ashr_concat_ashr_extract_sitofp(<2 x i64> %a0, <4 ; X32-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 ; X32-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; X32-NEXT: vpsubq %xmm2, %xmm0, %xmm0 -; X32-NEXT: vmovq {{.*#+}} xmm1 = xmm0[0],zero -; X32-NEXT: vmovq %xmm1, {{[0-9]+}}(%esp) -; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp) -; X32-NEXT: fildll {{[0-9]+}}(%esp) -; X32-NEXT: fstpl {{[0-9]+}}(%esp) -; X32-NEXT: fildll {{[0-9]+}}(%esp) -; X32-NEXT: fstpl (%esp) -; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X32-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp +; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; X32-NEXT: vcvtdq2pd %xmm0, %xmm0 ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; @@ -207,14 +193,9 @@ define <2 x double> @signbits_ashr_concat_ashr_extract_sitofp(<2 x i64> %a0, <4 ; X64-NEXT: vpsrad $16, %xmm0, %xmm1 ; X64-NEXT: vpsrlq $16, %xmm0, %xmm0 ; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] -; X64-NEXT: vpsrad $16, %xmm0, %xmm1 ; X64-NEXT: vpsrlq $16, %xmm0, %xmm0 -; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] -; X64-NEXT: vpextrq $1, %xmm0, %rax -; X64-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm1 -; X64-NEXT: vmovq %xmm0, %rax -; X64-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0 -; X64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; X64-NEXT: vcvtdq2pd %xmm0, %xmm0 ; X64-NEXT: retq %1 = ashr <2 x i64> %a0, %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> -- 2.40.0