From: Sanjay Patel Date: Sun, 23 Dec 2018 20:48:31 +0000 (+0000) Subject: [DAGCombiner] limit shuffle to extend transform (PR40146) X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=70157e398186d19c2d590efa9a76d98b117d4999;p=llvm [DAGCombiner] limit shuffle to extend transform (PR40146) It's dangerous to knowingly create an illegal vector type no matter what stage of combining we're in. This prevents the missed folding/scalarization seen in: https://bugs.llvm.org/show_bug.cgi?id=40146 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350034 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 93af1f4cc14..dda4f0176b1 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17092,8 +17092,7 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, - bool LegalOperations, - bool LegalTypes) { + bool LegalOperations) { EVT VT = SVN->getValueType(0); bool IsBigEndian = DAG.getDataLayout().isBigEndian(); @@ -17129,7 +17128,9 @@ static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale); EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale); - if (!LegalTypes || TLI.isTypeLegal(OutVT)) + // Never create an illegal type. Only create unsupported operations if we + // are pre-legalization. + if (TLI.isTypeLegal(OutVT)) if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT)) return DAG.getBitcast(VT, @@ -17439,7 +17440,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return SDValue(N, 0); // Match shuffles that can be converted to any_vector_extend_in_reg. - if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes)) + if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations)) return V; // Combine "truncate_vector_in_reg" style shuffles. diff --git a/test/CodeGen/X86/vec_cast3.ll b/test/CodeGen/X86/vec_cast3.ll index fdf7c00d2ee..e4ff93a2281 100644 --- a/test/CodeGen/X86/vec_cast3.ll +++ b/test/CodeGen/X86/vec_cast3.ll @@ -240,86 +240,18 @@ define <2 x i32> @cvt_v2f32_v2u32(<2 x float> %src) { define <32 x i8> @PR40146(<4 x i64> %x) { ; CHECK-LABEL: PR40146: ; CHECK: ## %bb.0: -; CHECK-NEXT: vpextrd $2, %xmm0, %eax -; CHECK-NEXT: movzbl %ah, %ecx -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpinsrb $0, %eax, %xmm1, %xmm2 -; CHECK-NEXT: vpinsrb $2, %ecx, %xmm2, %xmm2 -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: shrl $16, %ecx -; CHECK-NEXT: vpinsrb $4, %ecx, %xmm2, %xmm2 -; CHECK-NEXT: vpextrd $3, %xmm0, %ecx -; CHECK-NEXT: shrl $24, %eax -; CHECK-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; CHECK-NEXT: movzbl %ch, %eax -; CHECK-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2 -; CHECK-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; CHECK-NEXT: movl %ecx, %eax -; CHECK-NEXT: shrl $16, %eax -; CHECK-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; CHECK-NEXT: vmovd %xmm0, %eax -; CHECK-NEXT: shrl $24, %ecx -; CHECK-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2 -; CHECK-NEXT: movzbl %ah, %ecx -; CHECK-NEXT: vpinsrb $0, %eax, %xmm1, %xmm1 -; CHECK-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: shrl $16, %ecx -; CHECK-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1 -; CHECK-NEXT: vpextrd $1, %xmm0, %ecx -; CHECK-NEXT: shrl $24, %eax -; CHECK-NEXT: vpinsrb $6, %eax, %xmm1, %xmm0 -; CHECK-NEXT: movzbl %ch, %eax -; CHECK-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 -; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; CHECK-NEXT: movl %ecx, %eax -; CHECK-NEXT: shrl $16, %eax -; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; CHECK-NEXT: shrl $24, %ecx -; CHECK-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; CHECK-NEXT: retl ; ; CHECK-WIDE-LABEL: PR40146: ; CHECK-WIDE: ## %bb.0: -; CHECK-WIDE-NEXT: vpextrd $2, %xmm0, %eax -; CHECK-WIDE-NEXT: movzbl %ah, %ecx -; CHECK-WIDE-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-WIDE-NEXT: vpinsrb $0, %eax, %xmm1, %xmm2 -; CHECK-WIDE-NEXT: vpinsrb $2, %ecx, %xmm2, %xmm2 -; CHECK-WIDE-NEXT: movl %eax, %ecx -; CHECK-WIDE-NEXT: shrl $16, %ecx -; CHECK-WIDE-NEXT: vpinsrb $4, %ecx, %xmm2, %xmm2 -; CHECK-WIDE-NEXT: vpextrd $3, %xmm0, %ecx -; CHECK-WIDE-NEXT: shrl $24, %eax -; CHECK-WIDE-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; CHECK-WIDE-NEXT: movzbl %ch, %eax -; CHECK-WIDE-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2 -; CHECK-WIDE-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; CHECK-WIDE-NEXT: movl %ecx, %eax -; CHECK-WIDE-NEXT: shrl $16, %eax -; CHECK-WIDE-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; CHECK-WIDE-NEXT: vmovd %xmm0, %eax -; CHECK-WIDE-NEXT: shrl $24, %ecx -; CHECK-WIDE-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2 -; CHECK-WIDE-NEXT: movzbl %ah, %ecx -; CHECK-WIDE-NEXT: vpinsrb $0, %eax, %xmm1, %xmm1 -; CHECK-WIDE-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 -; CHECK-WIDE-NEXT: movl %eax, %ecx -; CHECK-WIDE-NEXT: shrl $16, %ecx -; CHECK-WIDE-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1 -; CHECK-WIDE-NEXT: vpextrd $1, %xmm0, %ecx -; CHECK-WIDE-NEXT: shrl $24, %eax -; CHECK-WIDE-NEXT: vpinsrb $6, %eax, %xmm1, %xmm0 -; CHECK-WIDE-NEXT: movzbl %ch, %eax -; CHECK-WIDE-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 -; CHECK-WIDE-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; CHECK-WIDE-NEXT: movl %ecx, %eax -; CHECK-WIDE-NEXT: shrl $16, %eax -; CHECK-WIDE-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; CHECK-WIDE-NEXT: shrl $24, %ecx -; CHECK-WIDE-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; CHECK-WIDE-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; CHECK-WIDE-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; CHECK-WIDE-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; CHECK-WIDE-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; CHECK-WIDE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; CHECK-WIDE-NEXT: retl %perm = shufflevector <4 x i64> %x, <4 x i64> undef, <4 x i32> %t1 = bitcast <4 x i64> %perm to <32 x i8>