From d48da2b6432c4a5616f44ca12170f2cfbfdeafac Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 19 Jun 2019 13:58:02 +0000 Subject: [PATCH] [TargetLowering] SimplifyDemandedBits SIGN_EXTEND_VECTOR_INREG -> ANY/ZERO_EXTEND_VECTOR_INREG Simplify SIGN_EXTEND_VECTOR_INREG if the extended bits are not required/known zero. Matches what we already do for SIGN_EXTEND. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363802 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 16 ++++++++++------ test/CodeGen/X86/pmul.ll | 10 ++++------ test/CodeGen/X86/xop-ifma.ll | 10 ++++------ 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index dcd8478e48b..9d457427528 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1413,9 +1413,11 @@ bool TargetLowering::SimplifyDemandedBits( bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG; // If none of the top bits are demanded, convert this into an any_extend. - // TODO: Add SIGN_EXTEND_VECTOR_INREG - ANY_EXTEND_VECTOR_INREG fold. - if (DemandedBits.getActiveBits() <= InBits && !IsVecInReg) - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Src)); + if (DemandedBits.getActiveBits() <= InBits) + return TLO.CombineTo( + Op, TLO.DAG.getNode(IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG + : ISD::ANY_EXTEND, + dl, VT, Src)); APInt InDemandedBits = DemandedBits.trunc(InBits); APInt InDemandedElts = DemandedElts.zextOrSelf(InElts); @@ -1434,9 +1436,11 @@ bool TargetLowering::SimplifyDemandedBits( Known = Known.sext(BitWidth); // If the sign bit is known zero, convert this to a zero extend. - // TODO: Add SIGN_EXTEND_VECTOR_INREG - ZERO_EXTEND_VECTOR_INREG fold. - if (Known.isNonNegative() && !IsVecInReg) - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Src)); + if (Known.isNonNegative()) + return TLO.CombineTo( + Op, TLO.DAG.getNode(IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG + : ISD::ZERO_EXTEND, + dl, VT, Src)); break; } case ISD::ANY_EXTEND: { diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll index 1960e790925..5d28f916f05 100644 --- a/test/CodeGen/X86/pmul.ll +++ b/test/CodeGen/X86/pmul.ll @@ -1326,15 +1326,13 @@ define <8 x i64> @mul_v8i64_sext(<8 x i16> %val1, <8 x i32> %val2) { ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3] ; SSE41-NEXT: pmovsxwq %xmm3, %xmm6 ; SSE41-NEXT: pmovsxwq %xmm0, %xmm7 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] -; SSE41-NEXT: pmovsxdq %xmm0, %xmm3 +; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,2,3,3] ; SSE41-NEXT: pmuldq %xmm4, %xmm3 -; SSE41-NEXT: pmovsxdq %xmm2, %xmm2 +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero ; SSE41-NEXT: pmuldq %xmm5, %xmm2 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] -; SSE41-NEXT: pmovsxdq %xmm0, %xmm4 +; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,2,3,3] ; SSE41-NEXT: pmuldq %xmm6, %xmm4 -; SSE41-NEXT: pmovsxdq %xmm1, %xmm0 +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero ; SSE41-NEXT: pmuldq %xmm7, %xmm0 ; SSE41-NEXT: movdqa %xmm4, %xmm1 ; SSE41-NEXT: retq diff --git a/test/CodeGen/X86/xop-ifma.ll b/test/CodeGen/X86/xop-ifma.ll index b8ec25ec1ee..4712910e11b 100644 --- a/test/CodeGen/X86/xop-ifma.ll +++ b/test/CodeGen/X86/xop-ifma.ll @@ -67,12 +67,10 @@ define <8 x i32> @test_mul_v8i32_add_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i3 define <4 x i64> @test_mulx_v4i32_add_v4i64(<4 x i32> %a0, <4 x i32> %a1, <4 x i64> %a2) { ; XOP-AVX1-LABEL: test_mulx_v4i32_add_v4i64: ; XOP-AVX1: # %bb.0: -; XOP-AVX1-NEXT: vpmovsxdq %xmm0, %xmm3 -; XOP-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; XOP-AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 -; XOP-AVX1-NEXT: vpmovsxdq %xmm1, %xmm4 -; XOP-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; XOP-AVX1-NEXT: vpmovsxdq %xmm1, %xmm1 +; XOP-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero +; XOP-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,3,3] +; XOP-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm1[0],zero,xmm1[1],zero +; XOP-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,1,3,3] ; XOP-AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 ; XOP-AVX1-NEXT: vpmacsdql %xmm5, %xmm1, %xmm0, %xmm0 ; XOP-AVX1-NEXT: vpmacsdql %xmm2, %xmm4, %xmm3, %xmm1 -- 2.40.0