From: Simon Pilgrim Date: Sun, 10 Sep 2017 14:06:41 +0000 (+0000) Subject: [X86][SSE] Move combineTo call out of combineX86ShuffleChain. NFCI. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=45694861ece2410d899d3dff559a8dc76eae7631;p=llvm [X86][SSE] Move combineTo call out of combineX86ShuffleChain. NFCI. First step towards making it possible to use the shuffle combines for cases where we don't want to call DCI.CombineTo() with the result. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@312884 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ed8b3890360..c94fc5ed44b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -27611,11 +27611,11 @@ static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, /// into either a single instruction if there is a special purpose instruction /// for this operation, or into a PSHUFB instruction which is a fully general /// instruction but should only be used to replace chains over a certain depth. -static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, - ArrayRef BaseMask, int Depth, - bool HasVariableMask, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget &Subtarget) { +static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, + ArrayRef BaseMask, int Depth, + bool HasVariableMask, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { assert(!BaseMask.empty() && "Cannot combine an empty shuffle mask!"); assert((Inputs.size() == 1 || Inputs.size() == 2) && "Unexpected number of shuffle inputs!"); @@ -27640,9 +27640,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, unsigned NumBaseMaskElts = BaseMask.size(); if (NumBaseMaskElts == 1) { assert(BaseMask[0] == 0 && "Invalid shuffle index found!"); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, V1), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, V1); } unsigned RootSizeInBits = RootVT.getSizeInBits(); @@ -27660,7 +27658,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, bool IsEVEXShuffle = RootSizeInBits == 512 || (Subtarget.hasVLX() && RootSizeInBits >= 128); if (IsEVEXShuffle && (RootVT.getScalarSizeInBits() != BaseMaskEltSizeInBits)) - return false; + return SDValue(); // TODO - handle 128/256-bit lane shuffles of 512-bit vectors. @@ -27669,7 +27667,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, if (UnaryShuffle && RootVT.is256BitVector() && NumBaseMaskElts == 2 && !isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0)) { if (Depth == 1 && Root.getOpcode() == X86ISD::VPERM2X128) - return false; // Nothing to do! + return SDValue(); // Nothing to do! MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64); unsigned PermMask = 0; PermMask |= ((BaseMask[0] < 0 ? 0x8 : (BaseMask[0] & 1)) << 0); @@ -27681,9 +27679,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, DAG.getUNDEF(ShuffleVT), DAG.getConstant(PermMask, DL, MVT::i8)); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } // For masks that have been widened to 128-bit elements or more, @@ -27708,7 +27704,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, // Only allow legal mask types. if (!DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) - return false; + return SDValue(); // Attempt to match the mask against known shuffle patterns. MVT ShuffleSrcVT, ShuffleVT; @@ -27736,9 +27732,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, ArrayRef HiMask(Mask.data() + Scale, NumMaskElts - Scale); if (isSequentialOrUndefInRange(Mask, 0, Scale, 0) && isUndefOrZeroOrInRange(HiMask, Scale, NumMaskElts)) { - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, V1), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, V1); } } @@ -27746,33 +27740,29 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, V1, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT)) { if (Depth == 1 && Root.getOpcode() == Shuffle) - return false; // Nothing to do! + return SDValue(); // Nothing to do! if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements())) - return false; // AVX512 Writemask clash. + return SDValue(); // AVX512 Writemask clash. Res = DAG.getBitcast(ShuffleSrcVT, V1); DCI.AddToWorklist(Res.getNode()); Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, Subtarget, Shuffle, ShuffleVT, PermuteImm)) { if (Depth == 1 && Root.getOpcode() == Shuffle) - return false; // Nothing to do! + return SDValue(); // Nothing to do! if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements())) - return false; // AVX512 Writemask clash. + return SDValue(); // AVX512 Writemask clash. Res = DAG.getBitcast(ShuffleVT, V1); DCI.AddToWorklist(Res.getNode()); Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res, DAG.getConstant(PermuteImm, DL, MVT::i8)); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } } @@ -27780,18 +27770,16 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, V1, V2, DL, DAG, Subtarget, Shuffle, ShuffleVT, UnaryShuffle)) { if (Depth == 1 && Root.getOpcode() == Shuffle) - return false; // Nothing to do! + return SDValue(); // Nothing to do! if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements())) - return false; // AVX512 Writemask clash. + return SDValue(); // AVX512 Writemask clash. V1 = DAG.getBitcast(ShuffleVT, V1); DCI.AddToWorklist(V1.getNode()); V2 = DAG.getBitcast(ShuffleVT, V2); DCI.AddToWorklist(V2.getNode()); Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, @@ -27799,9 +27787,9 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, Subtarget, Shuffle, ShuffleVT, PermuteImm)) { if (Depth == 1 && Root.getOpcode() == Shuffle) - return false; // Nothing to do! + return SDValue(); // Nothing to do! if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements())) - return false; // AVX512 Writemask clash. + return SDValue(); // AVX512 Writemask clash. V1 = DAG.getBitcast(ShuffleVT, V1); DCI.AddToWorklist(V1.getNode()); V2 = DAG.getBitcast(ShuffleVT, V2); @@ -27809,9 +27797,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2, DAG.getConstant(PermuteImm, DL, MVT::i8)); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } // Typically from here on, we need an integer version of MaskVT. @@ -27824,21 +27810,19 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, if (matchVectorShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx, Zeroable)) { if (Depth == 1 && Root.getOpcode() == X86ISD::EXTRQI) - return false; // Nothing to do! + return SDValue(); // Nothing to do! V1 = DAG.getBitcast(IntMaskVT, V1); DCI.AddToWorklist(V1.getNode()); Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1, DAG.getConstant(BitLen, DL, MVT::i8), DAG.getConstant(BitIdx, DL, MVT::i8)); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } if (matchVectorShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) { if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI) - return false; // Nothing to do! + return SDValue(); // Nothing to do! V1 = DAG.getBitcast(IntMaskVT, V1); DCI.AddToWorklist(V1.getNode()); V2 = DAG.getBitcast(IntMaskVT, V2); @@ -27847,16 +27831,14 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, DAG.getConstant(BitLen, DL, MVT::i8), DAG.getConstant(BitIdx, DL, MVT::i8)); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } } // Don't try to re-form single instruction chains under any circumstances now // that we've done encoding canonicalization for them. if (Depth < 2) - return false; + return SDValue(); bool MaskContainsZeros = any_of(Mask, [](int M) { return M == SM_SentinelZero; }); @@ -27879,9 +27861,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, DCI.AddToWorklist(Res.getNode()); Res = DAG.getNode(X86ISD::VPERMV, DL, MaskVT, VPermMask, Res); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } // Lower a unary+zero lane-crossing shuffle as VPERMV3 with a zero @@ -27910,9 +27890,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, DCI.AddToWorklist(Zero.getNode()); Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, Res, VPermMask, Zero); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } // If we have a dual input lane-crossing shuffle then lower to VPERMV3. @@ -27935,11 +27913,9 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, DCI.AddToWorklist(V2.getNode()); Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, V1, VPermMask, V2); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } - return false; + return SDValue(); } // See if we can combine a single input shuffle with zeros to a bit-mask, @@ -27969,9 +27945,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, FloatDomain ? unsigned(X86ISD::FAND) : unsigned(ISD::AND); Res = DAG.getNode(AndOpcode, DL, MaskVT, Res, BitMask); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } // If we have a single input shuffle with different shuffle patterns in the @@ -27992,9 +27966,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, DCI.AddToWorklist(Res.getNode()); Res = DAG.getNode(X86ISD::VPERMILPV, DL, MaskVT, Res, VPermMask); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } // With XOP, binary shuffles of 128/256-bit floating point vectors can combine @@ -28033,9 +28005,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp, DAG.getConstant(M2ZImm, DL, MVT::i8)); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } // If we have 3 or more shuffle instructions or a chain involving a variable @@ -28071,9 +28041,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, DCI.AddToWorklist(PSHUFBMaskOp.getNode()); Res = DAG.getNode(X86ISD::PSHUFB, DL, ByteVT, Res, PSHUFBMaskOp); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } // With XOP, if we have a 128-bit binary input shuffle we can always combine @@ -28109,13 +28077,11 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, DCI.AddToWorklist(VPPERMMaskOp.getNode()); Res = DAG.getNode(X86ISD::VPPERM, DL, ByteVT, V1, V2, VPPERMMaskOp); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } // Failed to find any combines. - return false; + return SDValue(); } // Attempt to constant fold all of the constant source ops. @@ -28434,8 +28400,14 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, std::swap(Ops[0], Ops[1]); } - return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask, DAG, - DCI, Subtarget); + // Finally, try to combine into a single shuffle instruction. + SDValue Res = combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask, DAG, + DCI, Subtarget); + if (Res) { + DCI.CombineTo(Root.getNode(), Res, /*AddTo*/ true); + return true; + } + return false; } /// \brief Get the PSHUF-style mask from PSHUF node.