From: Simon Pilgrim Date: Thu, 23 Mar 2017 16:09:34 +0000 (+0000) Subject: [X86][SSE] Extract elements from narrower shuffle masks. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=de4fa9880719f50784a34b0065568428c574aa70;p=llvm [X86][SSE] Extract elements from narrower shuffle masks. Add support for widening narrow shuffle masks so we can directly extract from the relevant input vector of the shuffle. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298616 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f4685c14874..d7fe86c7730 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -29165,9 +29165,10 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, SDValue Src = N->getOperand(0); SDValue Idx = N->getOperand(1); + EVT VT = N->getValueType(0); EVT SrcVT = Src.getValueType(); EVT SrcSVT = SrcVT.getVectorElementType(); - EVT VT = N->getValueType(0); + unsigned NumSrcElts = SrcVT.getVectorNumElements(); // Don't attempt this for boolean mask vectors or unknown extraction indices. if (SrcSVT == MVT::i1 || !isa(Idx)) @@ -29179,21 +29180,27 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask)) return SDValue(); - // At the moment we can only narrow a shuffle mask to handle extractions - // of smaller scalars. - // TODO - investigate support for wider shuffle masks with known upper - // undef/zero elements for implicit zero-extension. - unsigned NumMaskElts = Mask.size(); - if ((SrcVT.getVectorNumElements() % NumMaskElts) != 0) - return SDValue(); - - int Scale = SrcVT.getVectorNumElements() / NumMaskElts; - if (Scale != 1) { - SmallVector ScaledMask; - scaleShuffleMask(Scale, Mask, ScaledMask); - Mask = ScaledMask; + // Attempt to narrow/widen the shuffle mask to the correct size. + if (Mask.size() != NumSrcElts) { + if ((NumSrcElts % Mask.size()) == 0) { + SmallVector ScaledMask; + int Scale = NumSrcElts / Mask.size(); + scaleShuffleMask(Scale, Mask, ScaledMask); + Mask = std::move(ScaledMask); + } else if ((Mask.size() % NumSrcElts) == 0) { + SmallVector WidenedMask; + while (Mask.size() > NumSrcElts && + canWidenShuffleElements(Mask, WidenedMask)) + Mask = std::move(WidenedMask); + // TODO - investigate support for wider shuffle masks with known upper + // undef/zero elements for implicit zero-extension. + } } + // Check if narrowing/widening failed. + if (Mask.size() != NumSrcElts) + return SDValue(); + int SrcIdx = Mask[N->getConstantOperandVal(1)]; SDLoc dl(N); diff --git a/test/CodeGen/X86/known-signbits-vector.ll b/test/CodeGen/X86/known-signbits-vector.ll index 6922bf0afcb..f2b2d34f765 100644 --- a/test/CodeGen/X86/known-signbits-vector.ll +++ b/test/CodeGen/X86/known-signbits-vector.ll @@ -76,11 +76,8 @@ define float @signbits_ashr_extract_sitofp(<2 x i64> %a0) nounwind { ; X32-LABEL: signbits_ashr_extract_sitofp: ; X32: # BB#0: ; X32-NEXT: pushl %eax -; X32-NEXT: vpsrad $31, %xmm0, %xmm1 -; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] -; X32-NEXT: vmovd %xmm0, %eax -; X32-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 +; X32-NEXT: vpextrd $1, %xmm0, %eax +; X32-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0 ; X32-NEXT: vmovss %xmm0, (%esp) ; X32-NEXT: flds (%esp) ; X32-NEXT: popl %eax