From: Simon Pilgrim Date: Tue, 14 Feb 2017 16:22:04 +0000 (+0000) Subject: [X86][SSE] Allow matchVectorShuffleWithUNPCK to recognise UNDEF inputs X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2fce16a04e36b219362b0022a944d829abeaafc2;p=llvm [X86][SSE] Allow matchVectorShuffleWithUNPCK to recognise UNDEF inputs Add support for specifying an UNPCK input as UNDEF git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@295061 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f62befed91f..2288ce8941f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8224,25 +8224,37 @@ static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT, static bool matchVectorShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, unsigned &UnpackOpcode, bool IsUnary, - ArrayRef TargetMask) { + ArrayRef TargetMask, + SelectionDAG &DAG) { + int NumElts = VT.getVectorNumElements(); + int NumEltsInLane = 128 / VT.getScalarSizeInBits(); + + bool Undef1 = true, Undef2 = true; + for (int i = 0; (i != NumElts) && (Undef1 || Undef2); i += 2) { + Undef1 &= (SM_SentinelUndef == TargetMask[i + 0]); + Undef2 &= (SM_SentinelUndef == TargetMask[i + 1]); + } + // Attempt to match the target mask against the unpack lo/hi mask patterns. SmallVector Unpckl, Unpckh; createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, IsUnary); if (isTargetShuffleEquivalent(TargetMask, Unpckl)) { UnpackOpcode = X86ISD::UNPCKL; - V2 = IsUnary ? V1 : V2; + V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2)); + V1 = (Undef1 ? DAG.getUNDEF(VT) : V1); return true; } createUnpackShuffleMask(VT, Unpckh, /* Lo = */ false, IsUnary); if (isTargetShuffleEquivalent(TargetMask, Unpckh)) { UnpackOpcode = X86ISD::UNPCKH; - V2 = IsUnary ? V1 : V2; + V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2)); + V1 = (Undef1 ? DAG.getUNDEF(VT) : V1); return true; } - // If a binary shuffle, commute and try again. if (!IsUnary) { + // If a binary shuffle, commute and try again. ShuffleVectorSDNode::commuteMask(Unpckl); if (isTargetShuffleEquivalent(TargetMask, Unpckl)) { UnpackOpcode = X86ISD::UNPCKL; @@ -26571,6 +26583,7 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, // TODO: Investigate sharing more of this with shuffle lowering. static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef Mask, bool FloatDomain, SDValue &V1, SDValue &V2, + SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &ShuffleVT, bool IsUnary) { @@ -26610,7 +26623,8 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef Mask, (MaskVT.is256BitVector() && 32 <= EltSizeInBits && Subtarget.hasAVX()) || (MaskVT.is256BitVector() && Subtarget.hasAVX2()) || (MaskVT.is512BitVector() && Subtarget.hasAVX512())) { - if (matchVectorShuffleWithUNPCK(MaskVT, V1, V2, Shuffle, IsUnary, Mask)) { + if (matchVectorShuffleWithUNPCK(MaskVT, V1, V2, Shuffle, IsUnary, Mask, + DAG)) { ShuffleVT = MaskVT; if (ShuffleVT.is256BitVector() && !Subtarget.hasAVX2()) ShuffleVT = (32 == EltSizeInBits ? MVT::v8f32 : MVT::v4f64); @@ -26941,8 +26955,8 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, } } - if (matchBinaryVectorShuffle(MaskVT, Mask, FloatDomain, V1, V2, Subtarget, - Shuffle, ShuffleVT, UnaryShuffle)) { + if (matchBinaryVectorShuffle(MaskVT, Mask, FloatDomain, V1, V2, DAG, + Subtarget, Shuffle, ShuffleVT, UnaryShuffle)) { if (Depth == 1 && Root.getOpcode() == Shuffle) return false; // Nothing to do! if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements())) diff --git a/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/test/CodeGen/X86/vector-shuffle-combining-avx2.ll index 77864904261..bebf2bae245 100644 --- a/test/CodeGen/X86/vector-shuffle-combining-avx2.ll +++ b/test/CodeGen/X86/vector-shuffle-combining-avx2.ll @@ -665,12 +665,10 @@ define <32 x i8> @combine_pshufb_not_as_pshufw(<32 x i8> %a0) { define <32 x i8> @combine_pshufb_as_unpacklo_undef(<32 x i8> %a0) { ; X32-LABEL: combine_pshufb_as_unpacklo_undef: ; X32: # BB#0: -; X32-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] ; X32-NEXT: retl ; ; X64-LABEL: combine_pshufb_as_unpacklo_undef: ; X64: # BB#0: -; X64-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] ; X64-NEXT: retq %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> ) %2 = shufflevector <32 x i8> %1, <32 x i8> undef, <32 x i32> diff --git a/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll b/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll index 94d9f81214b..8d9fb07a66b 100644 --- a/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll +++ b/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll @@ -474,15 +474,9 @@ define <16 x i8> @combine_pshufb_as_unary_unpckhwd(<16 x i8> %a0) { } define <8 x i16> @combine_pshufb_as_unpacklo_undef(<16 x i8> %a0) { -; SSE-LABEL: combine_pshufb_as_unpacklo_undef: -; SSE: # BB#0: -; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] -; SSE-NEXT: retq -; -; AVX-LABEL: combine_pshufb_as_unpacklo_undef: -; AVX: # BB#0: -; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] -; AVX-NEXT: retq +; ALL-LABEL: combine_pshufb_as_unpacklo_undef: +; ALL: # BB#0: +; ALL-NEXT: retq %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> ) %2 = bitcast <16 x i8> %1 to <8 x i16> %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> @@ -490,15 +484,9 @@ define <8 x i16> @combine_pshufb_as_unpacklo_undef(<16 x i8> %a0) { } define <16 x i8> @combine_pshufb_as_unpackhi_undef(<16 x i8> %a0) { -; SSE-LABEL: combine_pshufb_as_unpackhi_undef: -; SSE: # BB#0: -; SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; SSE-NEXT: retq -; -; AVX-LABEL: combine_pshufb_as_unpackhi_undef: -; AVX: # BB#0: -; AVX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; AVX-NEXT: retq +; ALL-LABEL: combine_pshufb_as_unpackhi_undef: +; ALL: # BB#0: +; ALL-NEXT: retq %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> ) %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> ret <16 x i8> %2