From dfe9b148cbcc50fb5a031cb0609520ffbfa9143d Mon Sep 17 00:00:00 2001 From: Nirav Dave Date: Fri, 11 Aug 2017 13:21:41 +0000 Subject: [PATCH] Improve handling of insert_subvector of bitcast values Fix insert_subvector / extract_subvector merges of bitcast values. Reviewers: efriedma, craig.topper, RKSimon Subscribers: RKSimon, llvm-commits Differential Revision: https://reviews.llvm.org/D34571 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@310711 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 35 ++++++++++++++++++++++ test/CodeGen/X86/MergeConsecutiveStores.ll | 28 +++++------------ test/CodeGen/X86/vector-shuffle-256-v4.ll | 2 +- 3 files changed, 44 insertions(+), 21 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a247f2656be..2aa8e6382e6 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -15890,12 +15890,47 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { if (N1.isUndef()) return N0; + // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow + // us to pull BITCASTs from input to output. + if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR) + if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode())) + return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2); + // If this is an insert of an extracted vector into an undef vector, we can // just use the input to the extract. if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT) return N1.getOperand(0); + // If we are inserting a bitcast value into an undef, with the same + // number of elements, just use the bitcast input of the extract. + // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 -> + // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2) + if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST && + N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR && + N1.getOperand(0).getOperand(1) == N2 && + N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() == + VT.getVectorNumElements()) { + return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0)); + } + + // If both N1 and N2 are bitcast values on which insert_subvector + // would makes sense, pull the bitcast through. + // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 -> + // BITCAST (INSERT_SUBVECTOR N0 N1 N2) + if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) { + SDValue CN0 = N0.getOperand(0); + SDValue CN1 = N1.getOperand(0); + if (CN0.getValueType().getVectorElementType() == + CN1.getValueType().getVectorElementType() && + CN0.getValueType().getVectorNumElements() == + VT.getVectorNumElements()) { + SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), + CN0.getValueType(), CN0, CN1, N2); + return DAG.getBitcast(VT, NewINSERT); + } + } + // Combine INSERT_SUBVECTORs where we are inserting to the same index. // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx ) // --> INSERT_SUBVECTOR( Vec, SubNew, Idx ) diff --git a/test/CodeGen/X86/MergeConsecutiveStores.ll b/test/CodeGen/X86/MergeConsecutiveStores.ll index 608b92da514..69f5f4c7a05 100644 --- a/test/CodeGen/X86/MergeConsecutiveStores.ll +++ b/test/CodeGen/X86/MergeConsecutiveStores.ll @@ -492,15 +492,10 @@ define void @merge_vec_element_store(<8 x float> %v, float* %ptr) { store float %vecext7, float* %arrayidx7, align 4 ret void -; CHECK: vextractf128 $1, %ymm0, %xmm1 -; CHECK: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; CHECK-LABEL: merge_vec_element_store +; CHECK: vmovups %ymm0, (%rdi) +; CHECK: vzeroupper ; CHECK: retq - -; This is what should be generated: -; FIXME-LABEL: merge_vec_element_store -; FIXME: vmovups -; FIXME-NEXT: vzeroupper -; FIXME-NEXT: retq } ; PR21711 - Merge vector stores into wider vector stores. @@ -520,18 +515,11 @@ define void @merge_vec_extract_stores(<8 x float> %v1, <8 x float> %v2, <4 x flo store <4 x float> %shuffle3, <4 x float>* %idx3, align 16 ret void -; These vblendpd are obviously redundant. -; CHECK: vblendpd $12, %ymm0, %ymm0, %ymm0 # ymm0 = ymm0[0,1,2,3] -; CHECK: vmovupd %ymm0, 48(%rdi) -; CHECK: vblendpd $12, %ymm1, %ymm1, %ymm0 # ymm0 = ymm1[0,1,2,3] -; CHECK: vmovupd %ymm0, 80(%rdi) - -; This is what should be generated: -; FIXME-LABEL: merge_vec_extract_stores -; FIXME: vmovups %ymm0, 48(%rdi) -; FIXME-NEXT: vmovups %ymm1, 80(%rdi) -; FIXME-NEXT: vzeroupper -; FIXME-NEXT: retq +; CHECK-LABEL: merge_vec_extract_stores +; CHECK: vmovups %ymm0, 48(%rdi) +; CHECK-NEXT: vmovups %ymm1, 80(%rdi) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq } ; Merging vector stores when sourced from vector loads. diff --git a/test/CodeGen/X86/vector-shuffle-256-v4.ll b/test/CodeGen/X86/vector-shuffle-256-v4.ll index 27bc2bb1682..8c84580120f 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -807,10 +807,10 @@ define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) { define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: shuffle_v4i64_0412: ; AVX1: # BB#0: +; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm0[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3] ; AVX1-NEXT: retq ; -- 2.50.1