From 7e2bbade378b432ef2271cd6ab0948566201196d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 31 Jan 2019 17:48:35 +0000 Subject: [PATCH] [X86][AVX] Fold concat(broadcast(x),broadcast(x)) -> broadcast(x) Differential Revision: https://reviews.llvm.org/D57514 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352774 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 11 ++-- test/CodeGen/X86/subvector-broadcast.ll | 79 +++++++------------------ test/CodeGen/X86/widened-broadcast.ll | 22 ++----- 3 files changed, 30 insertions(+), 82 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index da479eb692c..18240ef0014 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -41651,12 +41651,11 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) && Vec.hasOneUse()) return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec); - // If this is subv_broadcast insert into both halves, use a larger - // subv_broadcast. - // TODO - handle X86ISD::VBROADCAST as well? - if (SubVec.getOpcode() == X86ISD::SUBV_BROADCAST && SubVec == SubVec2) - return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, - SubVec.getOperand(0)); + // If this broadcast/subv_broadcast is inserted into both halves, use a + // larger broadcast/subv_broadcast. + if (SubVec == SubVec2 && (SubVec.getOpcode() == X86ISD::VBROADCAST || + SubVec.getOpcode() == X86ISD::SUBV_BROADCAST)) + return DAG.getNode(SubVec.getOpcode(), dl, OpVT, SubVec.getOperand(0)); // If we're inserting all zeros into the upper half, change this to // an insert into an all zeros vector. We will match this to a move diff --git a/test/CodeGen/X86/subvector-broadcast.ll b/test/CodeGen/X86/subvector-broadcast.ll index 066e4dc016a..c0ab9ac47c9 100644 --- a/test/CodeGen/X86/subvector-broadcast.ll +++ b/test/CodeGen/X86/subvector-broadcast.ll @@ -1582,60 +1582,28 @@ define <4 x i32> @test_2xi32_to_4xi32_mem(<2 x i32>* %vp) { } define <8 x i32> @test_2xi32_to_8xi32_mem(<2 x i32>* %vp) { -; X32-AVX1-LABEL: test_2xi32_to_8xi32_mem: -; X32-AVX1: # %bb.0: -; X32-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] -; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; X32-AVX1-NEXT: retl -; -; X32-AVX2-LABEL: test_2xi32_to_8xi32_mem: -; X32-AVX2: # %bb.0: -; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX2-NEXT: vbroadcastsd (%eax), %ymm0 -; X32-AVX2-NEXT: retl -; -; X32-AVX512-LABEL: test_2xi32_to_8xi32_mem: -; X32-AVX512: # %bb.0: -; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512-NEXT: vbroadcastsd (%eax), %ymm0 -; X32-AVX512-NEXT: retl -; -; X64-AVX1-LABEL: test_2xi32_to_8xi32_mem: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] -; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: test_2xi32_to_8xi32_mem: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 -; X64-AVX2-NEXT: retq +; X32-LABEL: test_2xi32_to_8xi32_mem: +; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: vbroadcastsd (%eax), %ymm0 +; X32-NEXT: retl ; -; X64-AVX512-LABEL: test_2xi32_to_8xi32_mem: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vbroadcastsd (%rdi), %ymm0 -; X64-AVX512-NEXT: retq +; X64-LABEL: test_2xi32_to_8xi32_mem: +; X64: # %bb.0: +; X64-NEXT: vbroadcastsd (%rdi), %ymm0 +; X64-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> ret <8 x i32> %res } define <16 x i32> @test_2xi32_to_16xi32_mem(<2 x i32>* %vp) { -; X32-AVX1-LABEL: test_2xi32_to_16xi32_mem: -; X32-AVX1: # %bb.0: -; X32-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] -; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; X32-AVX1-NEXT: vmovaps %ymm0, %ymm1 -; X32-AVX1-NEXT: retl -; -; X32-AVX2-LABEL: test_2xi32_to_16xi32_mem: -; X32-AVX2: # %bb.0: -; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX2-NEXT: vbroadcastsd (%eax), %ymm0 -; X32-AVX2-NEXT: vmovaps %ymm0, %ymm1 -; X32-AVX2-NEXT: retl +; X32-AVX-LABEL: test_2xi32_to_16xi32_mem: +; X32-AVX: # %bb.0: +; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-AVX-NEXT: vbroadcastsd (%eax), %ymm0 +; X32-AVX-NEXT: vmovaps %ymm0, %ymm1 +; X32-AVX-NEXT: retl ; ; X32-AVX512-LABEL: test_2xi32_to_16xi32_mem: ; X32-AVX512: # %bb.0: @@ -1645,18 +1613,11 @@ define <16 x i32> @test_2xi32_to_16xi32_mem(<2 x i32>* %vp) { ; X32-AVX512-NEXT: vpermd %zmm0, %zmm1, %zmm0 ; X32-AVX512-NEXT: retl ; -; X64-AVX1-LABEL: test_2xi32_to_16xi32_mem: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] -; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; X64-AVX1-NEXT: vmovaps %ymm0, %ymm1 -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: test_2xi32_to_16xi32_mem: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 -; X64-AVX2-NEXT: vmovaps %ymm0, %ymm1 -; X64-AVX2-NEXT: retq +; X64-AVX-LABEL: test_2xi32_to_16xi32_mem: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vbroadcastsd (%rdi), %ymm0 +; X64-AVX-NEXT: vmovaps %ymm0, %ymm1 +; X64-AVX-NEXT: retq ; ; X64-AVX512-LABEL: test_2xi32_to_16xi32_mem: ; X64-AVX512: # %bb.0: diff --git a/test/CodeGen/X86/widened-broadcast.ll b/test/CodeGen/X86/widened-broadcast.ll index c192ee6fd28..ecbeb532f27 100644 --- a/test/CodeGen/X86/widened-broadcast.ll +++ b/test/CodeGen/X86/widened-broadcast.ll @@ -607,21 +607,10 @@ define <8 x i32> @load_splat_8i32_2i32_0101(<2 x i32>* %vp) { ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: retq ; -; AVX1-LABEL: load_splat_8i32_2i32_0101: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: load_splat_8i32_2i32_0101: -; AVX2: # %bb.0: -; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: load_splat_8i32_2i32_0101: -; AVX512: # %bb.0: -; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: load_splat_8i32_2i32_0101: +; AVX: # %bb.0: +; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 +; AVX-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> ret <8 x i32> %res @@ -639,8 +628,7 @@ define <16 x i32> @load_splat_16i32_2i32_0101(<2 x i32>* %vp) { ; ; AVX1-LABEL: load_splat_16i32_2i32_0101: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0 ; AVX1-NEXT: vmovaps %ymm0, %ymm1 ; AVX1-NEXT: retq ; -- 2.40.0