From: Craig Topper Date: Fri, 4 Jan 2019 20:50:59 +0000 (+0000) Subject: [X86] Add INSERT_SUBVECTOR to ComputeNumSignBits X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d096eb7cf0948df6ff9ef1bb6d3f2fcebaf8c762;p=llvm [X86] Add INSERT_SUBVECTOR to ComputeNumSignBits This adds support for calculating sign bits of insert_subvector. I based it on the computeKnownBits. My motivating case is propagating sign bits information across basic blocks on AVX targets where concatenating using insert_subvector is common. Differential Revision: https://reviews.llvm.org/D56283 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350432 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 519f4d82f6a..3cff4c5c0fc 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3693,7 +3693,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, } return ComputeNumSignBits(Src, Depth + 1); } - case ISD::CONCAT_VECTORS: + case ISD::CONCAT_VECTORS: { // Determine the minimum number of sign bits across all demanded // elts of the input vectors. Early out if the result is already 1. Tmp = std::numeric_limits::max(); @@ -3711,6 +3711,40 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); return Tmp; } + case ISD::INSERT_SUBVECTOR: { + // If we know the element index, demand any elements from the subvector and + // the remainder from the src its inserted into, otherwise demand them all. + SDValue Src = Op.getOperand(0); + SDValue Sub = Op.getOperand(1); + auto *SubIdx = dyn_cast(Op.getOperand(2)); + unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); + if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) { + Tmp = std::numeric_limits::max(); + uint64_t Idx = SubIdx->getZExtValue(); + APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); + if (!!DemandedSubElts) { + Tmp = ComputeNumSignBits(Sub, DemandedSubElts, Depth + 1); + if (Tmp == 1) return 1; // early-out + } + APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts); + APInt DemandedSrcElts = DemandedElts & ~SubMask; + if (!!DemandedSrcElts) { + Tmp2 = ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } + assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); + return Tmp; + } + + // Not able to determine the index so just assume worst case. + Tmp = ComputeNumSignBits(Sub, Depth + 1); + if (Tmp == 1) return 1; // early-out + Tmp2 = ComputeNumSignBits(Src, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); + return Tmp; + } + } // If we are looking at the loaded value of the SDNode. if (Op.getResNo() == 0) { diff --git a/test/CodeGen/X86/known-signbits-vector.ll b/test/CodeGen/X86/known-signbits-vector.ll index 02362bde81a..3defc21dd2f 100644 --- a/test/CodeGen/X86/known-signbits-vector.ll +++ b/test/CodeGen/X86/known-signbits-vector.ll @@ -394,20 +394,11 @@ define void @cross_bb_signbits_insert_subvec(<32 x i8>* %ptr, <32 x i8> %x, <32 ; X32-LABEL: cross_bb_signbits_insert_subvec: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X32-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X32-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3 -; X32-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 -; X32-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 -; X32-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X32-NEXT: vpsllw $7, %xmm3, %xmm3 -; X32-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; X32-NEXT: vpand %xmm4, %xmm3, %xmm3 -; X32-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm3 -; X32-NEXT: vpsllw $7, %xmm0, %xmm0 -; X32-NEXT: vpand %xmm4, %xmm0, %xmm0 -; X32-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 -; X32-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 +; X32-NEXT: vextractf128 $1, %ymm0, %xmm2 +; X32-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; X32-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2 +; X32-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 +; X32-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; X32-NEXT: vandnps %ymm1, %ymm0, %ymm1 ; X32-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0 ; X32-NEXT: vorps %ymm1, %ymm0, %ymm0 @@ -417,20 +408,11 @@ define void @cross_bb_signbits_insert_subvec(<32 x i8>* %ptr, <32 x i8> %x, <32 ; ; X64-LABEL: cross_bb_signbits_insert_subvec: ; X64: # %bb.0: -; X64-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X64-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3 -; X64-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 -; X64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 -; X64-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X64-NEXT: vpsllw $7, %xmm3, %xmm3 -; X64-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; X64-NEXT: vpand %xmm4, %xmm3, %xmm3 -; X64-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm3 -; X64-NEXT: vpsllw $7, %xmm0, %xmm0 -; X64-NEXT: vpand %xmm4, %xmm0, %xmm0 -; X64-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 -; X64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 +; X64-NEXT: vextractf128 $1, %ymm0, %xmm2 +; X64-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; X64-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2 +; X64-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 +; X64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; X64-NEXT: vandnps %ymm1, %ymm0, %ymm1 ; X64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 ; X64-NEXT: vorps %ymm1, %ymm0, %ymm0