From: Simon Pilgrim Date: Thu, 31 Jan 2019 11:15:05 +0000 (+0000) Subject: [X86][AVX] Fold vt1 concat_vectors(vt2 undef, vt2 broadcast(x)) --> vt1 broadcast(x) X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=611e5e46319aee436e16601bffe41f579ee5ebac;p=llvm [X86][AVX] Fold vt1 concat_vectors(vt2 undef, vt2 broadcast(x)) --> vt1 broadcast(x) If we're not inserting the broadcast into the lowest subvector then we can avoid the insertion by just performing a larger broadcast. Avoids a regression when we enable AVX1 broadcasts in shuffle combining git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352742 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 39fdafede7f..8412c95ce4e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -41593,6 +41593,7 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, // If this is subv_broadcast insert into both halves, use a larger // subv_broadcast. + // TODO - handle X86ISD::VBROADCAST as well? if (SubVec.getOpcode() == X86ISD::SUBV_BROADCAST && SubVec == SubVec2) return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec.getOperand(0)); @@ -41614,11 +41615,14 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, SubVec2, Vec.getOperand(2)); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Vec, SubVec, N->getOperand(2)); - } } } + // If this is a broadcast insert into an upper undef, use a larger broadcast. + if (Vec.isUndef() && IdxVal != 0 && SubVec.getOpcode() == X86ISD::VBROADCAST) + return DAG.getNode(X86ISD::VBROADCAST, dl, OpVT, SubVec.getOperand(0)); + return SDValue(); } diff --git a/test/CodeGen/X86/insert-into-constant-vector.ll b/test/CodeGen/X86/insert-into-constant-vector.ll index 9d95f98a57b..ea4c4f1a2d5 100644 --- a/test/CodeGen/X86/insert-into-constant-vector.ll +++ b/test/CodeGen/X86/insert-into-constant-vector.ll @@ -289,31 +289,27 @@ define <8 x i32> @elt7_v8i32(i32 %x) { ; ; X32AVX2-LABEL: elt7_v8i32: ; X32AVX2: # %bb.0: -; X32AVX2-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0 -; X32AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32AVX2-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0 ; X32AVX2-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7] ; X32AVX2-NEXT: retl ; ; X64AVX2-LABEL: elt7_v8i32: ; X64AVX2: # %bb.0: ; X64AVX2-NEXT: vmovd %edi, %xmm0 -; X64AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 -; X64AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X64AVX2-NEXT: vpbroadcastd %xmm0, %ymm0 ; X64AVX2-NEXT: vpblendd {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7] ; X64AVX2-NEXT: retq ; ; X32AVX512F-LABEL: elt7_v8i32: ; X32AVX512F: # %bb.0: -; X32AVX512F-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0 -; X32AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32AVX512F-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0 ; X32AVX512F-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7] ; X32AVX512F-NEXT: retl ; ; X64AVX512F-LABEL: elt7_v8i32: ; X64AVX512F: # %bb.0: ; X64AVX512F-NEXT: vmovd %edi, %xmm0 -; X64AVX512F-NEXT: vpbroadcastd %xmm0, %xmm0 -; X64AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X64AVX512F-NEXT: vpbroadcastd %xmm0, %ymm0 ; X64AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7] ; X64AVX512F-NEXT: retq %ins = insertelement <8 x i32> , i32 %x, i32 7