/// would simplify under the threshold for PSHUFB formation because of
/// combine-ordering. To fix this, we should do the redundant instruction
/// combining in this recursive walk.
-static bool combineX86ShufflesRecursively(
+static SDValue combineX86ShufflesRecursively(
ArrayRef<SDValue> SrcOps, int SrcOpIndex, SDValue Root,
ArrayRef<int> RootMask, ArrayRef<const SDNode *> SrcNodes, int Depth,
bool HasVariableMask, SelectionDAG &DAG,
// Bound the depth of our recursive combine because this is ultimately
// quadratic in nature.
if (Depth > 8)
- return false;
+ return SDValue();
// Directly rip through bitcasts to find the underlying operand.
SDValue Op = SrcOps[SrcOpIndex];
MVT VT = Op.getSimpleValueType();
if (!VT.isVector())
- return false; // Bail if we hit a non-vector.
+ return SDValue(); // Bail if we hit a non-vector.
assert(Root.getSimpleValueType().isVector() &&
"Shuffles operate on vector types!");
SmallVector<int, 64> OpMask;
SmallVector<SDValue, 2> OpInputs;
if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask, DAG))
- return false;
+ return SDValue();
assert(OpInputs.size() <= 2 && "Too many shuffle inputs");
SDValue Input0 = (OpInputs.size() > 0 ? OpInputs[0] : SDValue());
}
// Handle the all undef/zero cases early.
- if (all_of(Mask, [](int Idx) { return Idx == SM_SentinelUndef; })) {
- DCI.CombineTo(Root.getNode(), DAG.getUNDEF(Root.getValueType()));
- return true;
- }
- if (all_of(Mask, [](int Idx) { return Idx < 0; })) {
- // TODO - should we handle the mixed zero/undef case as well? Just returning
- // a zero mask will lose information on undef elements possibly reducing
- // future combine possibilities.
- DCI.CombineTo(Root.getNode(), getZeroVector(Root.getSimpleValueType(),
- Subtarget, DAG, SDLoc(Root)));
- return true;
- }
+ if (all_of(Mask, [](int Idx) { return Idx == SM_SentinelUndef; }))
+ return DAG.getUNDEF(Root.getValueType());
+
+ // TODO - should we handle the mixed zero/undef case as well? Just returning
+ // a zero mask will lose information on undef elements possibly reducing
+ // future combine possibilities.
+ if (all_of(Mask, [](int Idx) { return Idx < 0; }))
+ return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG,
+ SDLoc(Root));
// Remove unused shuffle source ops.
resolveTargetShuffleInputsAndMask(Ops, Mask);
for (int i = 0, e = Ops.size(); i < e; ++i)
if (Ops[i].getNode()->hasOneUse() ||
SDNode::areOnlyUsersOf(CombinedNodes, Ops[i].getNode()))
- if (combineX86ShufflesRecursively(Ops, i, Root, Mask, CombinedNodes,
- Depth + 1, HasVariableMask, DAG, DCI,
- Subtarget))
- return true;
+ if (SDValue Res = combineX86ShufflesRecursively(
+ Ops, i, Root, Mask, CombinedNodes, Depth + 1, HasVariableMask,
+ DAG, DCI, Subtarget))
+ return Res;
// Attempt to constant fold all of the constant source ops.
if (SDValue Cst = combineX86ShufflesConstants(
- Ops, Mask, Root, HasVariableMask, DAG, DCI, Subtarget)) {
- DCI.CombineTo(Root.getNode(), Cst);
- return true;
- }
+ Ops, Mask, Root, HasVariableMask, DAG, DCI, Subtarget))
+ return Cst;
// We can only combine unary and binary shuffle mask cases.
if (Ops.size() > 2)
- return false;
+ return SDValue();
// Minor canonicalization of the accumulated shuffle mask to make it easier
// to match below. All this does is detect masks with sequential pairs of
}
// Finally, try to combine into a single shuffle instruction.
- if (SDValue Res = combineX86ShuffleChain(
- Ops, Root, Mask, Depth, HasVariableMask, DAG, DCI, Subtarget)) {
- DCI.CombineTo(Root.getNode(), Res, /*AddTo*/ true);
- return true;
- }
- return false;
+ return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask, DAG,
+ DCI, Subtarget);
}
/// \brief Get the PSHUF-style mask from PSHUF node.
// specific PSHUF instruction sequences into their minimal form so that we
// can evaluate how many specialized shuffle instructions are involved in
// a particular chain.
- if (combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, DCI,
- Subtarget))
- return SDValue(); // This routine will use CombineTo to replace N.
+ if (SDValue Res = combineX86ShufflesRecursively(
+ {Op}, 0, Op, {0}, {}, /*Depth*/ 1,
+ /*HasVarMask*/ false, DAG, DCI, Subtarget)) {
+ DCI.CombineTo(N, Res);
+ return SDValue();
+ }
}
return SDValue();
// Attempt to combine as shuffle.
SDValue Op(N, 0);
- combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, DCI, Subtarget);
+ if (SDValue Res = combineX86ShufflesRecursively(
+ {Op}, 0, Op, {0}, {}, /*Depth*/ 1,
+ /*HasVarMask*/ false, DAG, DCI, Subtarget)) {
+ DCI.CombineTo(N, Res);
+ return SDValue();
+ }
+
return SDValue();
}
// We can decode 'whole byte' logical bit shifts as shuffles.
if (LogicalShift && (ShiftVal.getZExtValue() % 8) == 0) {
SDValue Op(N, 0);
- if (combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, DCI,
- Subtarget))
- return SDValue(); // This routine will use CombineTo to replace N.
+ if (SDValue Res = combineX86ShufflesRecursively(
+ {Op}, 0, Op, {0}, {}, /*Depth*/ 1,
+ /*HasVarMask*/ false, DAG, DCI, Subtarget)) {
+ DCI.CombineTo(N, Res);
+ return SDValue();
+ }
}
// Constant Folding.
// Attempt to combine PINSRB/PINSRW patterns to a shuffle.
SDValue Op(N, 0);
- combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, DCI, Subtarget);
+ if (SDValue Res = combineX86ShufflesRecursively(
+ {Op}, 0, Op, {0}, {}, /*Depth*/ 1,
+ /*HasVarMask*/ false, DAG, DCI, Subtarget)) {
+ DCI.CombineTo(N, Res);
+ return SDValue();
+ }
+
return SDValue();
}
// Attempt to recursively combine a bitmask AND with shuffles.
if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
SDValue Op(N, 0);
- if (combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, DCI,
- Subtarget))
- return SDValue(); // This routine will use CombineTo to replace N.
+ if (SDValue Res = combineX86ShufflesRecursively(
+ {Op}, 0, Op, {0}, {}, /*Depth*/ 1,
+ /*HasVarMask*/ false, DAG, DCI, Subtarget)) {
+ DCI.CombineTo(N, Res);
+ return SDValue();
+ }
}
return SDValue();
// Attempt to recursively combine a bitmask ANDNP with shuffles.
if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
SDValue Op(N, 0);
- if (combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, DCI,
- Subtarget))
- return SDValue(); // This routine will use CombineTo to replace N.
+ if (SDValue Res = combineX86ShufflesRecursively(
+ {Op}, 0, Op, {0}, {}, /*Depth*/ 1,
+ /*HasVarMask*/ false, DAG, DCI, Subtarget)) {
+ DCI.CombineTo(N, Res);
+ return SDValue();
+ }
}
return SDValue();