From: Simon Pilgrim Date: Tue, 25 Jun 2019 12:19:12 +0000 (+0000) Subject: [TargetLowering] SimplifyDemandedBits SIGN_EXTEND_VECTOR_INREG -> ANY/ZERO_EXTEND_VEC... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=07e7622e7c13fb444ecf44e2c4fc35bf2613bac2;p=llvm [TargetLowering] SimplifyDemandedBits SIGN_EXTEND_VECTOR_INREG -> ANY/ZERO_EXTEND_VECTOR_INREG Simplify SIGN_EXTEND_VECTOR_INREG if the extended bits are not required/known zero. Matches what we already do for SIGN_EXTEND. Reapplies rL363802 but now with legality checks added at rL364290 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364299 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 2608d06ff90..b20c3a937fe 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1416,9 +1416,9 @@ bool TargetLowering::SimplifyDemandedBits( bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG; // If none of the top bits are demanded, convert this into an any_extend. - // TODO: Add SIGN_EXTEND_VECTOR_INREG - ANY_EXTEND_VECTOR_INREG fold. - if (DemandedBits.getActiveBits() <= InBits && !IsVecInReg) { - unsigned Opc = ISD::ANY_EXTEND; + if (DemandedBits.getActiveBits() <= InBits) { + unsigned Opc = + IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND; if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src)); } @@ -1440,9 +1440,9 @@ bool TargetLowering::SimplifyDemandedBits( Known = Known.sext(BitWidth); // If the sign bit is known zero, convert this to a zero extend. - // TODO: Add SIGN_EXTEND_VECTOR_INREG - ZERO_EXTEND_VECTOR_INREG fold. - if (Known.isNonNegative() && !IsVecInReg) { - unsigned Opc = ISD::ZERO_EXTEND; + if (Known.isNonNegative()) { + unsigned Opc = + IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND; if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src)); } diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll index 2768bf0dc9d..5d28f916f05 100644 --- a/test/CodeGen/X86/pmul.ll +++ b/test/CodeGen/X86/pmul.ll @@ -1326,15 +1326,13 @@ define <8 x i64> @mul_v8i64_sext(<8 x i16> %val1, <8 x i32> %val2) { ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3] ; SSE41-NEXT: pmovsxwq %xmm3, %xmm6 ; SSE41-NEXT: pmovsxwq %xmm0, %xmm7 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] -; SSE41-NEXT: pmovsxdq %xmm0, %xmm3 +; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,2,3,3] ; SSE41-NEXT: pmuldq %xmm4, %xmm3 -; SSE41-NEXT: pmovsxdq %xmm2, %xmm2 +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero ; SSE41-NEXT: pmuldq %xmm5, %xmm2 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] -; SSE41-NEXT: pmovsxdq %xmm0, %xmm4 +; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,2,3,3] ; SSE41-NEXT: pmuldq %xmm6, %xmm4 -; SSE41-NEXT: pmovsxdq %xmm1, %xmm0 +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero ; SSE41-NEXT: pmuldq %xmm7, %xmm0 ; SSE41-NEXT: movdqa %xmm4, %xmm1 ; SSE41-NEXT: retq @@ -1345,9 +1343,9 @@ define <8 x i64> @mul_v8i64_sext(<8 x i16> %val1, <8 x i32> %val2) { ; AVX2-NEXT: vpmovsxwq %xmm2, %ymm2 ; AVX2-NEXT: vpmovsxwq %xmm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 -; AVX2-NEXT: vpmovsxdq %xmm3, %ymm3 +; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero ; AVX2-NEXT: vpmuldq %ymm3, %ymm2, %ymm2 -; AVX2-NEXT: vpmovsxdq %xmm1, %ymm1 +; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero ; AVX2-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vmovdqa %ymm2, %ymm1 ; AVX2-NEXT: retq diff --git a/test/CodeGen/X86/vselect.ll b/test/CodeGen/X86/vselect.ll index 073c15c96d2..0d43a50330f 100644 --- a/test/CodeGen/X86/vselect.ll +++ b/test/CodeGen/X86/vselect.ll @@ -672,14 +672,23 @@ define i64 @vselect_any_extend_vector_inreg_crash(<8 x i8>* %x) { ; SSE41-NEXT: movq %xmm2, %rax ; SSE41-NEXT: retq ; -; AVX-LABEL: vselect_any_extend_vector_inreg_crash: -; AVX: # %bb.0: -; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero -; AVX-NEXT: vpcmpeqw {{.*}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vpmovsxwq %xmm0, %xmm0 -; AVX-NEXT: vmovq %xmm0, %rax -; AVX-NEXT: andl $32768, %eax # imm = 0x8000 -; AVX-NEXT: retq +; AVX1-LABEL: vselect_any_extend_vector_inreg_crash: +; AVX1: # %bb.0: +; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; AVX1-NEXT: vpcmpeqw {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vmovq %xmm0, %rax +; AVX1-NEXT: andl $32768, %eax # imm = 0x8000 +; AVX1-NEXT: retq +; +; AVX2-LABEL: vselect_any_extend_vector_inreg_crash: +; AVX2: # %bb.0: +; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; AVX2-NEXT: vpcmpeqw {{.*}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; AVX2-NEXT: vmovq %xmm0, %rax +; AVX2-NEXT: andl $32768, %eax # imm = 0x8000 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq 0: %1 = load <8 x i8>, <8 x i8>* %x %2 = icmp eq <8 x i8> %1,