From: Craig Topper Date: Mon, 28 Aug 2017 15:28:33 +0000 (+0000) Subject: [DAGCombiner] Teach visitEXTRACT_SUBVECTOR to turn extracts of BUILD_VECTOR into... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=337c2dfa0bd216d0c211e5f4ab44442e97c20e77;p=llvm [DAGCombiner] Teach visitEXTRACT_SUBVECTOR to turn extracts of BUILD_VECTOR into smaller BUILD_VECTORs Only do this before operations are legalized of BUILD_VECTOR is Legal for the target. Differential Revision: https://reviews.llvm.org/D37186 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@311892 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index df38ef423fa..306fd6f4984 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -15157,6 +15157,29 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { // Skip bitcasting V = peekThroughBitcast(V); + // If the input is a build vector. Try to make a smaller build vector. + if (V->getOpcode() == ISD::BUILD_VECTOR) { + if (auto *Idx = dyn_cast(N->getOperand(1))) { + EVT InVT = V->getValueType(0); + unsigned NumElems = NVT.getSizeInBits() / InVT.getScalarSizeInBits(); + if (NumElems > 0) { + EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(), + InVT.getVectorElementType(), NumElems); + if (!LegalOperations || + TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT)) { + unsigned IdxVal = Idx->getZExtValue() * NVT.getScalarSizeInBits() / + InVT.getScalarSizeInBits(); + + // Extract the pieces from the original build_vector. + SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N), + makeArrayRef(V->op_begin() + IdxVal, + NumElems)); + return DAG.getBitcast(NVT, BuildVec); + } + } + } + } + if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { // Handle only simple case where vector being inserted and vector // being extracted are of same size. diff --git a/test/CodeGen/X86/fold-vector-sext-zext.ll b/test/CodeGen/X86/fold-vector-sext-zext.ll index 575bd5897e4..39e728816b0 100644 --- a/test/CodeGen/X86/fold-vector-sext-zext.ll +++ b/test/CodeGen/X86/fold-vector-sext-zext.ll @@ -83,8 +83,7 @@ define <4 x i32> @test_sext_4i8_4i32_undef() { define <4 x i64> @test_sext_4i8_4i64() { ; X32-LABEL: test_sext_4i8_4i64: ; X32: # BB#0: -; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,4294967295,4294967295] -; X32-NEXT: vinsertf128 $1, {{\.LCPI.*}}, %ymm0, %ymm0 +; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,4294967295,4294967295,2,0,4294967293,4294967295] ; X32-NEXT: retl ; ; X64-LABEL: test_sext_4i8_4i64: @@ -102,8 +101,7 @@ define <4 x i64> @test_sext_4i8_4i64() { define <4 x i64> @test_sext_4i8_4i64_undef() { ; X32-LABEL: test_sext_4i8_4i64_undef: ; X32: # BB#0: -; X32-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X32-NEXT: vinsertf128 $1, {{\.LCPI.*}}, %ymm0, %ymm0 +; X32-NEXT: vmovaps {{.*#+}} ymm0 = ; X32-NEXT: retl ; ; X64-LABEL: test_sext_4i8_4i64_undef: @@ -245,8 +243,7 @@ define <4 x i32> @test_zext_4i8_4i32() { define <4 x i64> @test_zext_4i8_4i64() { ; X32-LABEL: test_zext_4i8_4i64: ; X32: # BB#0: -; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,255,0] -; X32-NEXT: vinsertf128 $1, {{\.LCPI.*}}, %ymm0, %ymm0 +; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,255,0,2,0,253,0] ; X32-NEXT: retl ; ; X64-LABEL: test_zext_4i8_4i64: @@ -300,10 +297,7 @@ define <4 x i32> @test_zext_4i8_4i32_undef() { define <4 x i64> @test_zext_4i8_4i64_undef() { ; X32-LABEL: test_zext_4i8_4i64_undef: ; X32: # BB#0: -; X32-NEXT: vmovaps {{.*#+}} xmm0 = -; X32-NEXT: movl $2, %eax -; X32-NEXT: vmovd %eax, %xmm1 -; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; X32-NEXT: vmovaps {{.*#+}} ymm0 = ; X32-NEXT: retl ; ; X64-LABEL: test_zext_4i8_4i64_undef: diff --git a/test/CodeGen/X86/widen_extract-1.ll b/test/CodeGen/X86/widen_extract-1.ll index 3737ea96773..332dc940b17 100644 --- a/test/CodeGen/X86/widen_extract-1.ll +++ b/test/CodeGen/X86/widen_extract-1.ll @@ -7,8 +7,8 @@ define void @convert(<2 x double>* %dst.addr, <3 x double> %src) { ; X32-LABEL: convert: ; X32: # BB#0: # %entry -; X32-NEXT: movups {{[0-9]+}}(%esp), %xmm0 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movups {{[0-9]+}}(%esp), %xmm0 ; X32-NEXT: movaps %xmm0, (%eax) ; X32-NEXT: retl ;