From: Simon Pilgrim Date: Mon, 11 Jul 2016 12:49:35 +0000 (+0000) Subject: [X86][SSE] Generalise target shuffle combine of shuffles using variable masks X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a1057f06ca3aad5f9bf6857f27820b28bf512739;p=llvm [X86][SSE] Generalise target shuffle combine of shuffles using variable masks At present the only shuffle with a variable mask we recognise is PSHUFB, which influences if its worth the cost of mask creation/loading of a combined target shuffle with a variable mask. This change sets up the infrastructure to support other shuffles in the future but has no effect yet. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275059 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c2887706b6d..975e61d571e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3825,6 +3825,14 @@ static bool isTargetShuffle(unsigned Opcode) { } } +static bool isTargetShuffleVariableMask(unsigned Opcode) { + switch (Opcode) { + default: return false; + case X86ISD::PSHUFB: + return true; + } +} + static SDValue getTargetShuffleNode(unsigned Opc, const SDLoc &dl, MVT VT, SDValue V1, unsigned TargetMask, SelectionDAG &DAG) { @@ -25009,7 +25017,7 @@ static bool matchBinaryVectorShuffle(MVT SrcVT, ArrayRef Mask, /// instruction but should only be used to replace chains over a certain depth. static bool combineX86ShuffleChain(SDValue Input, SDValue Root, ArrayRef Mask, int Depth, - bool HasPSHUFB, SelectionDAG &DAG, + bool HasVariableMask, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { assert(!Mask.empty() && "Cannot combine an empty shuffle mask!"); @@ -25175,11 +25183,12 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root, if (Depth < 2) return false; - // If we have 3 or more shuffle instructions or a chain involving PSHUFB, we - // can replace them with a single PSHUFB instruction profitably. Intel's - // manuals suggest only using PSHUFB if doing so replacing 5 instructions, but - // in practice PSHUFB tends to be *very* fast so we're more aggressive. - if ((Depth >= 3 || HasPSHUFB) && + // If we have 3 or more shuffle instructions or a chain involving a variable + // mask, we can replace them with a single PSHUFB instruction profitably. + // Intel's manuals suggest only using PSHUFB if doing so replacing 5 + // instructions, but in practice PSHUFB tends to be *very* fast so we're + // more aggressive. + if ((Depth >= 3 || HasVariableMask) && ((VT.is128BitVector() && Subtarget.hasSSSE3()) || (VT.is256BitVector() && Subtarget.hasAVX2()) || (VT.is512BitVector() && Subtarget.hasBWI()))) { @@ -25249,7 +25258,7 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root, /// combining in this recursive walk. static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root, ArrayRef RootMask, - int Depth, bool HasPSHUFB, + int Depth, bool HasVariableMask, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -25351,13 +25360,12 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root, assert(Input0 && "Shuffle with no inputs detected"); - // TODO - generalize this to support any variable mask shuffle. - HasPSHUFB |= (Op.getOpcode() == X86ISD::PSHUFB); + HasVariableMask |= isTargetShuffleVariableMask(Op.getOpcode()); // See if we can recurse into Input0 (if it's a target shuffle). if (Op->isOnlyUserOf(Input0.getNode()) && - combineX86ShufflesRecursively(Input0, Root, Mask, Depth + 1, HasPSHUFB, - DAG, DCI, Subtarget)) + combineX86ShufflesRecursively(Input0, Root, Mask, Depth + 1, + HasVariableMask, DAG, DCI, Subtarget)) return true; // Minor canonicalization of the accumulated shuffle mask to make it easier @@ -25370,8 +25378,8 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root, Mask = std::move(WidenedMask); } - return combineX86ShuffleChain(Input0, Root, Mask, Depth, HasPSHUFB, DAG, DCI, - Subtarget); + return combineX86ShuffleChain(Input0, Root, Mask, Depth, HasVariableMask, DAG, + DCI, Subtarget); } /// \brief Get the PSHUF-style mask from PSHUF node.