From: Craig Topper Date: Thu, 27 Dec 2018 03:37:04 +0000 (+0000) Subject: [X86] Remove check that avoids creating PMULDQ with illegal types. Rely on SplitOpsAn... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=718039ebb75d709b91dcc3ca18eddedb283892fd;p=llvm [X86] Remove check that avoids creating PMULDQ with illegal types. Rely on SplitOpsAndApply to legalize it. Create PMULDQ/PMULUDQ as long as the number of elements is a power of 2. This seems to give some improvements in our ability to use SimplifyDemandedBits. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350084 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5260afb7a7f..231e20cc02f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -35067,7 +35067,8 @@ static SDValue combineMulToPMULDQ(SDNode *N, SelectionDAG &DAG, // Only support vXi64 vectors. if (!VT.isVector() || VT.getVectorElementType() != MVT::i64 || - !DAG.getTargetLoweringInfo().isTypeLegal(VT)) + VT.getVectorNumElements() < 2 || + !isPowerOf2_32(VT.getVectorNumElements())) return SDValue(); SDValue N0 = N->getOperand(0); diff --git a/test/CodeGen/X86/mulvi32.ll b/test/CodeGen/X86/mulvi32.ll index 6c6737a614b..fc185d18044 100644 --- a/test/CodeGen/X86/mulvi32.ll +++ b/test/CodeGen/X86/mulvi32.ll @@ -131,29 +131,24 @@ define <4 x i32> @_mul4xi32b(<4 x i32>, <4 x i32>) { define <4 x i64> @_mul4xi32toi64a(<4 x i32>, <4 x i32>) { ; SSE2-LABEL: _mul4xi32toi64a: ; SSE2: # %bb.0: -; SSE2-NEXT: pxor %xmm3, %xmm3 -; SSE2-NEXT: movdqa %xmm0, %xmm4 -; SSE2-NEXT: punpckhdq {{.*#+}} xmm4 = xmm4[2],xmm3[2],xmm4[3],xmm3[3] -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] -; SSE2-NEXT: pmuludq %xmm4, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] -; SSE2-NEXT: pmuludq %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm2, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,1,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,1,3] +; SSE2-NEXT: pmuludq %xmm3, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,1,3,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,1,3,3] +; SSE2-NEXT: pmuludq %xmm3, %xmm1 +; SSE2-NEXT: movdqa %xmm2, %xmm0 ; SSE2-NEXT: retq ; ; SSE42-LABEL: _mul4xi32toi64a: ; SSE42: # %bb.0: -; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] -; SSE42-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero -; SSE42-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero -; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] +; SSE42-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero ; SSE42-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero ; SSE42-NEXT: pmuludq %xmm3, %xmm2 -; SSE42-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero -; SSE42-NEXT: pmuludq %xmm4, %xmm0 -; SSE42-NEXT: movdqa %xmm2, %xmm1 +; SSE42-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,2,3,3] +; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3] +; SSE42-NEXT: pmuludq %xmm3, %xmm1 +; SSE42-NEXT: movdqa %xmm2, %xmm0 ; SSE42-NEXT: retq ; ; AVX1-LABEL: _mul4xi32toi64a: