/// into either a single instruction if there is a special purpose instruction
/// for this operation, or into a PSHUFB instruction which is a fully general
/// instruction but should only be used to replace chains over a certain depth.
-static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
- ArrayRef<int> BaseMask, int Depth,
- bool HasVariableMask, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &Subtarget) {
+static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
+ ArrayRef<int> BaseMask, int Depth,
+ bool HasVariableMask, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
assert(!BaseMask.empty() && "Cannot combine an empty shuffle mask!");
assert((Inputs.size() == 1 || Inputs.size() == 2) &&
"Unexpected number of shuffle inputs!");
unsigned NumBaseMaskElts = BaseMask.size();
if (NumBaseMaskElts == 1) {
assert(BaseMask[0] == 0 && "Invalid shuffle index found!");
- DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, V1),
- /*AddTo*/ true);
- return true;
+ return DAG.getBitcast(RootVT, V1);
}
unsigned RootSizeInBits = RootVT.getSizeInBits();
bool IsEVEXShuffle =
RootSizeInBits == 512 || (Subtarget.hasVLX() && RootSizeInBits >= 128);
if (IsEVEXShuffle && (RootVT.getScalarSizeInBits() != BaseMaskEltSizeInBits))
- return false;
+ return SDValue();
// TODO - handle 128/256-bit lane shuffles of 512-bit vectors.
if (UnaryShuffle && RootVT.is256BitVector() && NumBaseMaskElts == 2 &&
!isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0)) {
if (Depth == 1 && Root.getOpcode() == X86ISD::VPERM2X128)
- return false; // Nothing to do!
+ return SDValue(); // Nothing to do!
MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64);
unsigned PermMask = 0;
PermMask |= ((BaseMask[0] < 0 ? 0x8 : (BaseMask[0] & 1)) << 0);
DAG.getUNDEF(ShuffleVT),
DAG.getConstant(PermMask, DL, MVT::i8));
DCI.AddToWorklist(Res.getNode());
- DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
- /*AddTo*/ true);
- return true;
+ return DAG.getBitcast(RootVT, Res);
}
// For masks that have been widened to 128-bit elements or more,
// Only allow legal mask types.
if (!DAG.getTargetLoweringInfo().isTypeLegal(MaskVT))
- return false;
+ return SDValue();
// Attempt to match the mask against known shuffle patterns.
MVT ShuffleSrcVT, ShuffleVT;
ArrayRef<int> HiMask(Mask.data() + Scale, NumMaskElts - Scale);
if (isSequentialOrUndefInRange(Mask, 0, Scale, 0) &&
isUndefOrZeroOrInRange(HiMask, Scale, NumMaskElts)) {
- DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, V1),
- /*AddTo*/ true);
- return true;
+ return DAG.getBitcast(RootVT, V1);
}
}
V1, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
ShuffleVT)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
- return false; // Nothing to do!
+ return SDValue(); // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
- return false; // AVX512 Writemask clash.
+ return SDValue(); // AVX512 Writemask clash.
Res = DAG.getBitcast(ShuffleSrcVT, V1);
DCI.AddToWorklist(Res.getNode());
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
DCI.AddToWorklist(Res.getNode());
- DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
- /*AddTo*/ true);
- return true;
+ return DAG.getBitcast(RootVT, Res);
}
if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
AllowIntDomain, Subtarget, Shuffle,
ShuffleVT, PermuteImm)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
- return false; // Nothing to do!
+ return SDValue(); // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
- return false; // AVX512 Writemask clash.
+ return SDValue(); // AVX512 Writemask clash.
Res = DAG.getBitcast(ShuffleVT, V1);
DCI.AddToWorklist(Res.getNode());
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res,
DAG.getConstant(PermuteImm, DL, MVT::i8));
DCI.AddToWorklist(Res.getNode());
- DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
- /*AddTo*/ true);
- return true;
+ return DAG.getBitcast(RootVT, Res);
}
}
V1, V2, DL, DAG, Subtarget, Shuffle, ShuffleVT,
UnaryShuffle)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
- return false; // Nothing to do!
+ return SDValue(); // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
- return false; // AVX512 Writemask clash.
+ return SDValue(); // AVX512 Writemask clash.
V1 = DAG.getBitcast(ShuffleVT, V1);
DCI.AddToWorklist(V1.getNode());
V2 = DAG.getBitcast(ShuffleVT, V2);
DCI.AddToWorklist(V2.getNode());
Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2);
DCI.AddToWorklist(Res.getNode());
- DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
- /*AddTo*/ true);
- return true;
+ return DAG.getBitcast(RootVT, Res);
}
if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
Subtarget, Shuffle, ShuffleVT,
PermuteImm)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
- return false; // Nothing to do!
+ return SDValue(); // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
- return false; // AVX512 Writemask clash.
+ return SDValue(); // AVX512 Writemask clash.
V1 = DAG.getBitcast(ShuffleVT, V1);
DCI.AddToWorklist(V1.getNode());
V2 = DAG.getBitcast(ShuffleVT, V2);
Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2,
DAG.getConstant(PermuteImm, DL, MVT::i8));
DCI.AddToWorklist(Res.getNode());
- DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
- /*AddTo*/ true);
- return true;
+ return DAG.getBitcast(RootVT, Res);
}
// Typically from here on, we need an integer version of MaskVT.
if (matchVectorShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx,
Zeroable)) {
if (Depth == 1 && Root.getOpcode() == X86ISD::EXTRQI)
- return false; // Nothing to do!
+ return SDValue(); // Nothing to do!
V1 = DAG.getBitcast(IntMaskVT, V1);
DCI.AddToWorklist(V1.getNode());
Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1,
DAG.getConstant(BitLen, DL, MVT::i8),
DAG.getConstant(BitIdx, DL, MVT::i8));
DCI.AddToWorklist(Res.getNode());
- DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
- /*AddTo*/ true);
- return true;
+ return DAG.getBitcast(RootVT, Res);
}
if (matchVectorShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) {
if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI)
- return false; // Nothing to do!
+ return SDValue(); // Nothing to do!
V1 = DAG.getBitcast(IntMaskVT, V1);
DCI.AddToWorklist(V1.getNode());
V2 = DAG.getBitcast(IntMaskVT, V2);
DAG.getConstant(BitLen, DL, MVT::i8),
DAG.getConstant(BitIdx, DL, MVT::i8));
DCI.AddToWorklist(Res.getNode());
- DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
- /*AddTo*/ true);
- return true;
+ return DAG.getBitcast(RootVT, Res);
}
}
// Don't try to re-form single instruction chains under any circumstances now
// that we've done encoding canonicalization for them.
if (Depth < 2)
- return false;
+ return SDValue();
bool MaskContainsZeros =
any_of(Mask, [](int M) { return M == SM_SentinelZero; });
DCI.AddToWorklist(Res.getNode());
Res = DAG.getNode(X86ISD::VPERMV, DL, MaskVT, VPermMask, Res);
DCI.AddToWorklist(Res.getNode());
- DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
- /*AddTo*/ true);
- return true;
+ return DAG.getBitcast(RootVT, Res);
}
// Lower a unary+zero lane-crossing shuffle as VPERMV3 with a zero
DCI.AddToWorklist(Zero.getNode());
Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, Res, VPermMask, Zero);
DCI.AddToWorklist(Res.getNode());
- DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
- /*AddTo*/ true);
- return true;
+ return DAG.getBitcast(RootVT, Res);
}
// If we have a dual input lane-crossing shuffle then lower to VPERMV3.
DCI.AddToWorklist(V2.getNode());
Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, V1, VPermMask, V2);
DCI.AddToWorklist(Res.getNode());
- DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
- /*AddTo*/ true);
- return true;
+ return DAG.getBitcast(RootVT, Res);
}
- return false;
+ return SDValue();
}
// See if we can combine a single input shuffle with zeros to a bit-mask,
FloatDomain ? unsigned(X86ISD::FAND) : unsigned(ISD::AND);
Res = DAG.getNode(AndOpcode, DL, MaskVT, Res, BitMask);
DCI.AddToWorklist(Res.getNode());
- DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
- /*AddTo*/ true);
- return true;
+ return DAG.getBitcast(RootVT, Res);
}
// If we have a single input shuffle with different shuffle patterns in the
DCI.AddToWorklist(Res.getNode());
Res = DAG.getNode(X86ISD::VPERMILPV, DL, MaskVT, Res, VPermMask);
DCI.AddToWorklist(Res.getNode());
- DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
- /*AddTo*/ true);
- return true;
+ return DAG.getBitcast(RootVT, Res);
}
// With XOP, binary shuffles of 128/256-bit floating point vectors can combine
Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp,
DAG.getConstant(M2ZImm, DL, MVT::i8));
DCI.AddToWorklist(Res.getNode());
- DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
- /*AddTo*/ true);
- return true;
+ return DAG.getBitcast(RootVT, Res);
}
// If we have 3 or more shuffle instructions or a chain involving a variable
DCI.AddToWorklist(PSHUFBMaskOp.getNode());
Res = DAG.getNode(X86ISD::PSHUFB, DL, ByteVT, Res, PSHUFBMaskOp);
DCI.AddToWorklist(Res.getNode());
- DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
- /*AddTo*/ true);
- return true;
+ return DAG.getBitcast(RootVT, Res);
}
// With XOP, if we have a 128-bit binary input shuffle we can always combine
DCI.AddToWorklist(VPPERMMaskOp.getNode());
Res = DAG.getNode(X86ISD::VPPERM, DL, ByteVT, V1, V2, VPPERMMaskOp);
DCI.AddToWorklist(Res.getNode());
- DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
- /*AddTo*/ true);
- return true;
+ return DAG.getBitcast(RootVT, Res);
}
// Failed to find any combines.
- return false;
+ return SDValue();
}
// Attempt to constant fold all of the constant source ops.
std::swap(Ops[0], Ops[1]);
}
- return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask, DAG,
- DCI, Subtarget);
+ // Finally, try to combine into a single shuffle instruction.
+ SDValue Res = combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask, DAG,
+ DCI, Subtarget);
+ if (Res) {
+ DCI.CombineTo(Root.getNode(), Res, /*AddTo*/ true);
+ return true;
+ }
+ return false;
}
/// \brief Get the PSHUF-style mask from PSHUF node.