From 3b4235d52814d508d4879e83201cad0c11aac3ea Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 6 Aug 2019 08:08:11 +0000 Subject: [PATCH] Merging r366660 and r367306: ------------------------------------------------------------------------ r366660 | rksimon | 2019-07-21 21:04:44 +0200 (Sun, 21 Jul 2019) | 3 lines [X86] SimplifyDemandedVectorEltsForTargetNode - Move SUBV_BROADCAST narrowing handling. NFCI. Move the narrowing of SUBV_BROADCAST to where we handle all the other opcodes. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r367306 | rksimon | 2019-07-30 13:35:13 +0200 (Tue, 30 Jul 2019) | 5 lines [X86][AVX] SimplifyDemandedVectorElts - handle extraction from X86ISD::SUBV_BROADCAST source (PR42819) PR42819 showed an issue that we couldn't handle the case where we demanded a 'sub-sub-vector' of the SUBV_BROADCAST 'sub-vector' source. This patch recognizes these cases and extracts the sub-sub-vector instead of trying to broadcast to a type smaller than the 'sub-vector' source. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_90@367991 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 34 +++++++++++++----------------- test/CodeGen/X86/oddsubvector.ll | 32 ++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 19 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0b4bf687e6c..a03ce798858 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -34062,25 +34062,6 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( return true; break; } - case X86ISD::SUBV_BROADCAST: { - // Reduce size of broadcast if we don't need the upper half. - unsigned HalfElts = NumElts / 2; - if (DemandedElts.extractBits(HalfElts, HalfElts).isNullValue()) { - SDValue Src = Op.getOperand(0); - MVT SrcVT = Src.getSimpleValueType(); - - SDValue Half = Src; - if (SrcVT.getVectorNumElements() != HalfElts) { - MVT HalfVT = MVT::getVectorVT(SrcVT.getScalarType(), HalfElts); - Half = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, SDLoc(Op), HalfVT, Src); - } - - return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Half, 0, - TLO.DAG, SDLoc(Op), - Half.getValueSizeInBits())); - } - break; - } case X86ISD::VPERMV: { SDValue Mask = Op.getOperand(0); APInt MaskUndef, MaskZero; @@ -34134,6 +34115,21 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( SDValue Insert = insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); return TLO.CombineTo(Op, Insert); + } + // Subvector broadcast. + case X86ISD::SUBV_BROADCAST: { + SDLoc DL(Op); + SDValue Src = Op.getOperand(0); + if (Src.getValueSizeInBits() > ExtSizeInBits) + Src = extractSubVector(Src, 0, TLO.DAG, DL, ExtSizeInBits); + else if (Src.getValueSizeInBits() < ExtSizeInBits) { + MVT SrcSVT = Src.getSimpleValueType().getScalarType(); + MVT SrcVT = + MVT::getVectorVT(SrcSVT, ExtSizeInBits / SrcSVT.getSizeInBits()); + Src = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, DL, SrcVT, Src); + } + return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Src, 0, + TLO.DAG, DL, ExtSizeInBits)); } // Byte shifts by immediate. case X86ISD::VSHLDQ: diff --git a/test/CodeGen/X86/oddsubvector.ll b/test/CodeGen/X86/oddsubvector.ll index 9bc6c0f380a..bb384e118dc 100644 --- a/test/CodeGen/X86/oddsubvector.ll +++ b/test/CodeGen/X86/oddsubvector.ll @@ -158,3 +158,35 @@ define void @PR40815(%struct.Mat4* nocapture readonly dereferenceable(64), %stru store <4 x float> %5, <4 x float>* %13, align 16 ret void } + +define <16 x i32> @PR42819(<8 x i32>* %a0) { +; SSE-LABEL: PR42819: +; SSE: # %bb.0: +; SSE-NEXT: movdqu (%rdi), %xmm3 +; SSE-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7,8,9,10,11] +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: xorps %xmm2, %xmm2 +; SSE-NEXT: retq +; +; AVX-LABEL: PR42819: +; AVX: # %bb.0: +; AVX-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,1,2] +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm0[5,6,7] +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512-LABEL: PR42819: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovdqu (%rdi), %xmm0 +; AVX512-NEXT: movw $-8192, %ax # imm = 0xE000 +; AVX512-NEXT: kmovw %eax, %k1 +; AVX512-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z} +; AVX512-NEXT: retq + %1 = load <8 x i32>, <8 x i32>* %a0, align 4 + %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <16 x i32> + %3 = shufflevector <16 x i32> zeroinitializer, <16 x i32> %2, <16 x i32> + ret <16 x i32> %3 +} -- 2.40.0