From 468b64ac6c5a04d0616b52a7e5d0e3053a773eee Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 27 Sep 2017 18:56:20 +0000 Subject: [PATCH] [X86] Rewrite the zero vector checks in lowerV2X128VectorShuffle to use the Zeroable APInt We already have zeroable bits in an APInt. We might as well use that instead of checking for an all zero BUILD_VECTOR. Differential Revision: https://reviews.llvm.org/D37950 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@314332 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 33 +++++++++--------------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1cd3af98dc9..02ce5eb34f2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -12085,12 +12085,12 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, Zeroable, Subtarget, DAG)) return Blend; - bool IsV1Zero = ISD::isBuildVectorAllZeros(V1.getNode()); - bool IsV2Zero = ISD::isBuildVectorAllZeros(V2.getNode()); + bool IsLowZero = (Zeroable & 0x3) == 0x3; + bool IsHighZero = (Zeroable & 0xc) == 0xc; // If either input operand is a zero vector, use VPERM2X128 because its mask // allows us to replace the zero input with an implicit zero. - if (!IsV1Zero && !IsV2Zero) { + if (!IsLowZero && !IsHighZero) { // Check for patterns which can be matched with a single insert of a 128-bit // subvector. bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1}); @@ -12124,30 +12124,17 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, // [6] - ignore // [7] - zero high half of destination - int MaskLO = WidenedMask[0] < 0 ? 0 : WidenedMask[0]; - int MaskHI = WidenedMask[1] < 0 ? 0 : WidenedMask[1]; + assert(WidenedMask[0] >= 0 && WidenedMask[1] >= 0 && "Undef half?"); - unsigned PermMask = MaskLO | (MaskHI << 4); + unsigned PermMask = 0; + PermMask |= IsLowZero ? 0x08 : (WidenedMask[0] << 0); + PermMask |= IsHighZero ? 0x80 : (WidenedMask[1] << 4); - // If either input is a zero vector, replace it with an undef input. - // Shuffle mask values < 4 are selecting elements of V1. - // Shuffle mask values >= 4 are selecting elements of V2. - // Adjust each half of the permute mask by clearing the half that was - // selecting the zero vector and setting the zero mask bit. - if (IsV1Zero) { + // Check the immediate mask and replace unused sources with undef. + if ((PermMask & 0x0a) != 0x00 && (PermMask & 0xa0) != 0x00) V1 = DAG.getUNDEF(VT); - if (MaskLO < 2) - PermMask = (PermMask & 0xf0) | 0x08; - if (MaskHI < 2) - PermMask = (PermMask & 0x0f) | 0x80; - } - if (IsV2Zero) { + if ((PermMask & 0x0a) != 0x02 && (PermMask & 0xa0) != 0x20) V2 = DAG.getUNDEF(VT); - if (MaskLO >= 2) - PermMask = (PermMask & 0xf0) | 0x08; - if (MaskHI >= 2) - PermMask = (PermMask & 0x0f) | 0x80; - } return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2, DAG.getConstant(PermMask, DL, MVT::i8)); -- 2.40.0