From: Simon Pilgrim Date: Tue, 5 Feb 2019 17:02:49 +0000 (+0000) Subject: [X86][AVX] Attempt to combine shuffles to subvector broadcast load X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=eb26afba0714839bd53169cb3dc7d31680e83b30;p=llvm [X86][AVX] Attempt to combine shuffles to subvector broadcast load git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@353189 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b55db2b3efa..2cfc931a22a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -31070,6 +31070,24 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, bool IsEVEXShuffle = RootSizeInBits == 512 || (Subtarget.hasVLX() && RootSizeInBits >= 128); + // Attempt to match a subvector broadcast. + // shuffle(insert_subvector(undef, sub, 0), undef, 0, 0, 0, 0) + if (UnaryShuffle && + (BaseMaskEltSizeInBits == 128 || BaseMaskEltSizeInBits == 256)) { + SmallVector BroadcastMask(NumBaseMaskElts, 0); + if (isTargetShuffleEquivalent(BaseMask, BroadcastMask)) { + SDValue Src = Inputs[0]; + if (Src.getOpcode() == ISD::INSERT_SUBVECTOR && + Src.getOperand(0).isUndef() && + Src.getOperand(1).getValueSizeInBits() == BaseMaskEltSizeInBits && + MayFoldLoad(Src.getOperand(1)) && isNullConstant(Src.getOperand(2))) { + return DAG.getBitcast(RootVT, DAG.getNode(X86ISD::SUBV_BROADCAST, DL, + Src.getValueType(), + Src.getOperand(1))); + } + } + } + // TODO - handle 128/256-bit lane shuffles of 512-bit vectors. // Handle 128-bit lane shuffles of 256-bit vectors. diff --git a/test/CodeGen/X86/subvector-broadcast.ll b/test/CodeGen/X86/subvector-broadcast.ll index c2aeb071566..0e66f60dbc5 100644 --- a/test/CodeGen/X86/subvector-broadcast.ll +++ b/test/CodeGen/X86/subvector-broadcast.ll @@ -1726,8 +1726,7 @@ define <8 x double> @broadcast_v8f64_v2f64_u1u10101(<2 x double>* %vp) { ; X32-AVX512-LABEL: broadcast_v8f64_v2f64_u1u10101: ; X32-AVX512: # %bb.0: ; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512-NEXT: vmovapd (%eax), %xmm0 -; X32-AVX512-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1] +; X32-AVX512-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; X32-AVX512-NEXT: retl ; ; X64-AVX1-LABEL: broadcast_v8f64_v2f64_u1u10101: @@ -1745,8 +1744,7 @@ define <8 x double> @broadcast_v8f64_v2f64_u1u10101(<2 x double>* %vp) { ; ; X64-AVX512-LABEL: broadcast_v8f64_v2f64_u1u10101: ; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovapd (%rdi), %xmm0 -; X64-AVX512-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1] +; X64-AVX512-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; X64-AVX512-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp %res = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32>