From: Simon Pilgrim Date: Mon, 14 Jan 2019 19:07:26 +0000 (+0000) Subject: [X86][SSSE3] Bailout of lowerVectorShuffleAsPermuteAndUnpack for shuffle-with-zero... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e7868c9338a247444da237d05f8310d061905cdb;p=llvm [X86][SSSE3] Bailout of lowerVectorShuffleAsPermuteAndUnpack for shuffle-with-zero (PR40306) If we have PSHUFB and we're shuffling with a zero vector, then we are better off not doing VECTOR_SHUFFLE(UNPCK()) as we lose track of those zero elements. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@351103 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e2e63eb4c18..032c2106543 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -11964,10 +11964,9 @@ static SDValue lowerVectorShuffleAsInsertPS(const SDLoc &DL, SDValue V1, /// because for floating point vectors we have a generalized SHUFPS lowering /// strategy that handles everything that doesn't *exactly* match an unpack, /// making this clever lowering unnecessary. -static SDValue lowerVectorShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT, - SDValue V1, SDValue V2, - ArrayRef Mask, - SelectionDAG &DAG) { +static SDValue lowerVectorShuffleAsPermuteAndUnpack( + const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(!VT.isFloatingPoint() && "This routine only supports integer vectors."); assert(VT.is128BitVector() && @@ -12036,6 +12035,13 @@ static SDValue lowerVectorShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT, if (SDValue Unpack = TryUnpack(ScalarSize, ScalarSize / OrigScalarSize)) return Unpack; + // If we have PSHUFB, and we're shuffling with a zero vector then we're + // better off not doing VECTOR_SHUFFLE(UNPCK()) as we lose track of those + // zero elements. + if (Subtarget.hasSSSE3() && (ISD::isBuildVectorAllZeros(V1.getNode()) || + ISD::isBuildVectorAllZeros(V2.getNode()))) + return SDValue(); + // If none of the unpack-rooted lowerings worked (or were profitable) try an // initial unpack. if (NumLoInputs == 0 || NumHiInputs == 0) { @@ -12549,7 +12555,7 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, // Try to lower by permuting the inputs into an unpack instruction. if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack( - DL, MVT::v4i32, V1, V2, Mask, DAG)) + DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG)) return Unpack; } @@ -13245,8 +13251,8 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, return BitBlend; // Try to lower by permuting the inputs into an unpack instruction. - if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(DL, MVT::v8i16, V1, - V2, Mask, DAG)) + if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack( + DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG)) return Unpack; // If we can't directly blend but can use PSHUFB, that will be better as it @@ -13534,7 +13540,7 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, // shuffles will both be pshufb, in which case we shouldn't bother with // this. if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack( - DL, MVT::v16i8, V1, V2, Mask, DAG)) + DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG)) return Unpack; // If we have VBMI we can use one VPERM instead of multiple PSHUFBs. diff --git a/test/CodeGen/X86/vector-shuffle-128-v8.ll b/test/CodeGen/X86/vector-shuffle-128-v8.ll index 812946188ee..fd1098aa225 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -2488,17 +2488,19 @@ define <8 x i16> @shuffle_v8i16_9zzzuuuu(<8 x i16> %x) { ; ; SSSE3-LABEL: shuffle_v8i16_9zzzuuuu: ; SSSE3: # %bb.0: -; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v8i16_9zzzuuuu: ; SSE41: # %bb.0: -; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7] +; SSE41-NEXT: psrld $16, %xmm0 +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v8i16_9zzzuuuu: ; AVX: # %bb.0: -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7] +; AVX-NEXT: vpsrld $16, %xmm0, %xmm0 +; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; AVX-NEXT: retq %r = shufflevector <8 x i16> zeroinitializer, <8 x i16> %x, <8 x i32> ret <8 x i16> %r