From: Simon Pilgrim Date: Tue, 28 Mar 2017 13:05:48 +0000 (+0000) Subject: [X86][SSE] Begin merging vector shuffle to BLEND for lowering and combining. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3d39cb0a48a151781a0abcc64de2f8502e0754a1;p=llvm [X86][SSE] Begin merging vector shuffle to BLEND for lowering and combining. Split off matchVectorShuffleAsBlend from lowerVectorShuffleAsBlend for reuse in combining. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298914 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 74a8fb189ac..93da540cd6e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8207,6 +8207,24 @@ static bool isTargetShuffleEquivalent(ArrayRef Mask, return true; } +// Merges a general DAG shuffle mask and zeroable bit mask into a target shuffle +// mask. +static SmallVector createTargetShuffleMask(ArrayRef Mask, + const APInt &Zeroable) { + int NumElts = Mask.size(); + assert(NumElts == Zeroable.getBitWidth() && "Mismatch mask sizes"); + + SmallVector TargetMask(NumElts, SM_SentinelUndef); + for (unsigned i = 0; i != NumElts; ++i) { + int M = Mask[i]; + if (M == SM_SentinelUndef) + continue; + assert(0 <= M && M < (2 * NumElts) && "Out of range shuffle index"); + TargetMask[i] = (Zeroable[i] ? SM_SentinelZero : M); + } + return TargetMask; +} + // Check if the shuffle mask is suitable for the AVX vpunpcklwd or vpunpckhwd // instructions. static bool isUnpackWdShuffleMask(ArrayRef Mask, MVT VT) { @@ -8626,29 +8644,25 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG); -/// \brief Try to emit a blend instruction for a shuffle. -/// -/// This doesn't do any checks for the availability of instructions for blending -/// these values. It relies on the availability of the X86ISD::BLENDI pattern to -/// be matched in the backend with the type given. What it does check for is -/// that the shuffle mask is a blend, or convertible into a blend with zero. -static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, - SDValue V2, ArrayRef Original, - const APInt &Zeroable, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { - bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode()); - bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode()); - SmallVector Mask(Original.begin(), Original.end()); - bool ForceV1Zero = false, ForceV2Zero = false; +static bool matchVectorShuffleAsBlend(SDValue V1, SDValue V2, + MutableArrayRef TargetMask, + bool &ForceV1Zero, bool &ForceV2Zero, + uint64_t &BlendMask) { + bool V1IsZeroOrUndef = + V1.isUndef() || ISD::isBuildVectorAllZeros(V1.getNode()); + bool V2IsZeroOrUndef = + V2.isUndef() || ISD::isBuildVectorAllZeros(V2.getNode()); + + BlendMask = 0; + ForceV1Zero = false, ForceV2Zero = false; + assert(TargetMask.size() <= 64 && "Shuffle mask too big for blend mask"); // Attempt to generate the binary blend mask. If an input is zero then // we can use any lane. // TODO: generalize the zero matching to any scalar like isShuffleEquivalent. - uint64_t BlendMask = 0; - for (int i = 0, Size = Mask.size(); i < Size; ++i) { - int M = Mask[i]; - if (M < 0) + for (int i = 0, Size = TargetMask.size(); i < Size; ++i) { + int M = TargetMask[i]; + if (M == SM_SentinelUndef) continue; if (M == i) continue; @@ -8656,21 +8670,50 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, BlendMask |= 1ull << i; continue; } - if (Zeroable[i]) { - if (V1IsZero) { + if (M == SM_SentinelZero) { + if (V1IsZeroOrUndef) { ForceV1Zero = true; - Mask[i] = i; + TargetMask[i] = i; continue; } - if (V2IsZero) { + if (V2IsZeroOrUndef) { ForceV2Zero = true; BlendMask |= 1ull << i; - Mask[i] = i + Size; + TargetMask[i] = i + Size; continue; } } - return SDValue(); // Shuffled input! + return false; } + return true; +} + +uint64_t scaleVectorShuffleBlendMask(uint64_t BlendMask, int Size, int Scale) { + uint64_t ScaledMask = 0; + for (int i = 0; i != Size; ++i) + if (BlendMask & (1ull << i)) + ScaledMask |= ((1ull << Scale) - 1) << (i * Scale); + return ScaledMask; +}; + +/// \brief Try to emit a blend instruction for a shuffle. +/// +/// This doesn't do any checks for the availability of instructions for blending +/// these values. It relies on the availability of the X86ISD::BLENDI pattern to +/// be matched in the backend with the type given. What it does check for is +/// that the shuffle mask is a blend, or convertible into a blend with zero. +static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, + SDValue V2, ArrayRef Original, + const APInt &Zeroable, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + SmallVector Mask = createTargetShuffleMask(Original, Zeroable); + + uint64_t BlendMask = 0; + bool ForceV1Zero = false, ForceV2Zero = false; + if (!matchVectorShuffleAsBlend(V1, V2, Mask, ForceV1Zero, ForceV2Zero, + BlendMask)) + return SDValue(); // Create a REAL zero vector - ISD::isBuildVectorAllZeros allows UNDEFs. if (ForceV1Zero) @@ -8678,14 +8721,6 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, if (ForceV2Zero) V2 = getZeroVector(VT, Subtarget, DAG, DL); - auto ScaleBlendMask = [](uint64_t BlendMask, int Size, int Scale) { - uint64_t ScaledMask = 0; - for (int i = 0; i != Size; ++i) - if (BlendMask & (1ull << i)) - ScaledMask |= ((1ull << Scale) - 1) << (i * Scale); - return ScaledMask; - }; - switch (VT.SimpleTy) { case MVT::v2f64: case MVT::v4f32: @@ -8705,7 +8740,7 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, if (Subtarget.hasAVX2()) { // Scale the blend by the number of 32-bit dwords per element. int Scale = VT.getScalarSizeInBits() / 32; - BlendMask = ScaleBlendMask(BlendMask, Mask.size(), Scale); + BlendMask = scaleVectorShuffleBlendMask(BlendMask, Mask.size(), Scale); MVT BlendVT = VT.getSizeInBits() > 128 ? MVT::v8i32 : MVT::v4i32; V1 = DAG.getBitcast(BlendVT, V1); V2 = DAG.getBitcast(BlendVT, V2); @@ -8718,7 +8753,7 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, // For integer shuffles we need to expand the mask and cast the inputs to // v8i16s prior to blending. int Scale = 8 / VT.getVectorNumElements(); - BlendMask = ScaleBlendMask(BlendMask, Mask.size(), Scale); + BlendMask = scaleVectorShuffleBlendMask(BlendMask, Mask.size(), Scale); V1 = DAG.getBitcast(MVT::v8i16, V1); V2 = DAG.getBitcast(MVT::v8i16, V2); return DAG.getBitcast(VT, @@ -27124,44 +27159,21 @@ static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, BlendVT = MVT::v8f32; } - unsigned BlendSize = BlendVT.getVectorNumElements(); - unsigned MaskRatio = BlendSize / NumMaskElts; - - // Can we blend with zero? - if (isSequentialOrUndefOrZeroInRange(Mask, /*Pos*/ 0, /*Size*/ NumMaskElts, - /*Low*/ 0) && - NumMaskElts <= BlendVT.getVectorNumElements()) { - PermuteImm = 0; - for (unsigned i = 0; i != BlendSize; ++i) - if (Mask[i / MaskRatio] < 0) - PermuteImm |= 1u << i; - - V2 = getZeroVector(BlendVT, Subtarget, DAG, DL); - Shuffle = X86ISD::BLENDI; - ShuffleVT = BlendVT; - return true; - } - - // Attempt to match as a binary blend. if (NumMaskElts <= BlendVT.getVectorNumElements()) { - bool MatchBlend = true; - for (int i = 0; i != (int)NumMaskElts; ++i) { - int M = Mask[i]; - if (M == SM_SentinelUndef) - continue; - if ((M == SM_SentinelZero) || - ((M != i) && (M != (i + (int)NumMaskElts)))) { - MatchBlend = false; - break; + uint64_t BlendMask = 0; + bool ForceV1Zero = false, ForceV2Zero = false; + SmallVector TargetMask(Mask.begin(), Mask.end()); + if (matchVectorShuffleAsBlend(V1, V2, TargetMask, ForceV1Zero, + ForceV2Zero, BlendMask)) { + if (NumMaskElts < BlendVT.getVectorNumElements()) { + int Scale = BlendVT.getVectorNumElements() / NumMaskElts; + BlendMask = + scaleVectorShuffleBlendMask(BlendMask, NumMaskElts, Scale); } - } - - if (MatchBlend) { - PermuteImm = 0; - for (unsigned i = 0; i != BlendSize; ++i) - if ((int)NumMaskElts <= Mask[i / MaskRatio]) - PermuteImm |= 1u << i; + V1 = ForceV1Zero ? getZeroVector(BlendVT, Subtarget, DAG, DL) : V1; + V2 = ForceV2Zero ? getZeroVector(BlendVT, Subtarget, DAG, DL) : V2; + PermuteImm = (unsigned)BlendMask; Shuffle = X86ISD::BLENDI; ShuffleVT = BlendVT; return true;