From: Simon Pilgrim Date: Thu, 31 Jan 2019 14:04:07 +0000 (+0000) Subject: [X86][AVX] Fold broadcast(bitcast(src)) -> bitcast(broadcast(src)) X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d568dc2c8f20cafbfb3125578615638f6d2c6bca;p=llvm [X86][AVX] Fold broadcast(bitcast(src)) -> bitcast(broadcast(src)) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352751 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index cb5dce233f7..9bc3d482074 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -31938,6 +31938,14 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, return DAG.getNode(X86ISD::VBROADCAST, DL, VT, DAG.getBitcast(SrcVT, Res)); } + // broadcast(bitcast(src)) -> bitcast(broadcast(src)) + // 32-bit targets have to bitcast i64 to f64, so better to bitcast upward. + if (Src.getOpcode() == ISD::BITCAST && + SrcVT.getScalarSizeInBits() == BCVT.getScalarSizeInBits()) { + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), BCVT.getScalarType(), + VT.getVectorNumElements()); + return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC)); + } return SDValue(); } case X86ISD::PSHUFD: diff --git a/test/CodeGen/X86/avx-vbroadcast.ll b/test/CodeGen/X86/avx-vbroadcast.ll index ccb054dce43..9346c944eb8 100644 --- a/test/CodeGen/X86/avx-vbroadcast.ll +++ b/test/CodeGen/X86/avx-vbroadcast.ll @@ -45,9 +45,9 @@ define <4 x i64> @A2(i64* %ptr, i64* %ptr2) nounwind uwtable readnone ssp { ; X64-LABEL: A2: ; X64: ## %bb.0: ## %entry ; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: vmovq %rax, %xmm0 ; X64-NEXT: movq %rax, (%rsi) -; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; X64-NEXT: vmovq %rax, %xmm0 +; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-NEXT: retq entry: @@ -110,8 +110,8 @@ define <8 x i32> @B3(i32* %ptr, i32* %ptr2) nounwind uwtable readnone ssp { ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl (%ecx), %ecx -; X32-NEXT: vmovd %ecx, %xmm0 ; X32-NEXT: movl %ecx, (%eax) +; X32-NEXT: vmovd %ecx, %xmm0 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X32-NEXT: retl @@ -119,8 +119,8 @@ define <8 x i32> @B3(i32* %ptr, i32* %ptr2) nounwind uwtable readnone ssp { ; X64-LABEL: B3: ; X64: ## %bb.0: ## %entry ; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: vmovd %eax, %xmm0 ; X64-NEXT: movl %eax, (%rsi) +; X64-NEXT: vmovd %eax, %xmm0 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-NEXT: retq diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll index b333e9109bd..9d4cfcefd64 100644 --- a/test/CodeGen/X86/avx2-vbroadcast.ll +++ b/test/CodeGen/X86/avx2-vbroadcast.ll @@ -244,8 +244,7 @@ define <16 x i16> @broadcast_mem_v4i16_v16i16(<4 x i16>* %ptr) { ; X32-LABEL: broadcast_mem_v4i16_v16i16: ; X32: ## %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X32-NEXT: vbroadcastsd %xmm0, %ymm0 +; X32-NEXT: vbroadcastsd (%eax), %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: broadcast_mem_v4i16_v16i16: diff --git a/test/CodeGen/X86/subvector-broadcast.ll b/test/CodeGen/X86/subvector-broadcast.ll index f72d9ba7125..066e4dc016a 100644 --- a/test/CodeGen/X86/subvector-broadcast.ll +++ b/test/CodeGen/X86/subvector-broadcast.ll @@ -1592,15 +1592,13 @@ define <8 x i32> @test_2xi32_to_8xi32_mem(<2 x i32>* %vp) { ; X32-AVX2-LABEL: test_2xi32_to_8xi32_mem: ; X32-AVX2: # %bb.0: ; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X32-AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 +; X32-AVX2-NEXT: vbroadcastsd (%eax), %ymm0 ; X32-AVX2-NEXT: retl ; ; X32-AVX512-LABEL: test_2xi32_to_8xi32_mem: ; X32-AVX512: # %bb.0: ; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X32-AVX512-NEXT: vbroadcastsd %xmm0, %ymm0 +; X32-AVX512-NEXT: vbroadcastsd (%eax), %ymm0 ; X32-AVX512-NEXT: retl ; ; X64-AVX1-LABEL: test_2xi32_to_8xi32_mem: @@ -1635,8 +1633,7 @@ define <16 x i32> @test_2xi32_to_16xi32_mem(<2 x i32>* %vp) { ; X32-AVX2-LABEL: test_2xi32_to_16xi32_mem: ; X32-AVX2: # %bb.0: ; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X32-AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 +; X32-AVX2-NEXT: vbroadcastsd (%eax), %ymm0 ; X32-AVX2-NEXT: vmovaps %ymm0, %ymm1 ; X32-AVX2-NEXT: retl ;