From: Craig Topper Date: Mon, 4 Dec 2017 05:38:42 +0000 (+0000) Subject: [SelectionDAG] Teach computeKnownBits some improvements to ISD::SRL with a non-splat... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7cd55ac3b8f23fe22e74a72efa40dcab0678fd17;p=llvm [SelectionDAG] Teach computeKnownBits some improvements to ISD::SRL with a non-splat constant shift amount. If we have a non-splat constant shift amount, the minimum shift amount can be used to infer the number of zero upper bits of the result. There's probably a lot more that we can do here, but this fixes a case where I wanted to infer the sign bit as zero when all the shift amounts are non-zero. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319639 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 0b59af2fa10..bcc972e0b4f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2478,6 +2478,25 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, Known.One.lshrInPlace(Shift); // High bits are known zero. Known.Zero.setHighBits(Shift); + } else if (auto *BV = dyn_cast(Op.getOperand(1))) { + // If the shift amount is a vector of constants see if we can bound + // the number of upper zero bits. + unsigned ShiftAmountMin = BitWidth; + for (unsigned i = 0; i != BV->getNumOperands(); ++i) { + if (auto *C = dyn_cast(BV->getOperand(i))) { + const APInt &ShAmt = C->getAPIntValue(); + if (ShAmt.ult(BitWidth)) { + ShiftAmountMin = std::min(ShiftAmountMin, + ShAmt.getZExtValue()); + continue; + } + } + // Don't know anything. + ShiftAmountMin = 0; + break; + } + + Known.Zero.setHighBits(ShiftAmountMin); } break; case ISD::SRA: diff --git a/test/CodeGen/X86/combine-srl.ll b/test/CodeGen/X86/combine-srl.ll index c5f03dbd5a3..9be85422731 100644 --- a/test/CodeGen/X86/combine-srl.ll +++ b/test/CodeGen/X86/combine-srl.ll @@ -257,13 +257,13 @@ define <4 x i32> @combine_vec_lshr_trunc_lshr_zero1(<4 x i64> %x) { ; SSE-NEXT: psrlq $49, %xmm2 ; SSE-NEXT: psrlq $48, %xmm0 ; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] -; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] -; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: packusdw %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: psrld $27, %xmm1 -; SSE-NEXT: movaps %xmm0, %xmm2 +; SSE-NEXT: movdqa %xmm0, %xmm2 ; SSE-NEXT: psrld $25, %xmm2 ; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] -; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: psrld $26, %xmm1 ; SSE-NEXT: psrld $24, %xmm0 ; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] @@ -273,8 +273,8 @@ define <4 x i32> @combine_vec_lshr_trunc_lshr_zero1(<4 x i64> %x) { ; AVX-LABEL: combine_vec_lshr_trunc_lshr_zero1: ; AVX: # BB#0: ; AVX-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0 -; AVX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] -; AVX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq