static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
+ if (!Subtarget.hasSSE2())
+ return SDValue();
+
EVT SVT = VT.getScalarType();
EVT InVT = In.getValueType();
EVT InSVT = InVT.getScalarType();
// Emit a VPACKUSDW+VPERMQ followed by a VPMOVUSWB.
SDValue Mid = truncateVectorWithPACK(X86ISD::PACKUS, MVT::v16i16, USatVal,
DL, DAG, Subtarget);
+ assert(Mid && "Failed to pack!");
return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, Mid);
}
}
(InSVT == MVT::i16 || InSVT == MVT::i32)) {
if (auto USatVal = detectSSatPattern(In, VT, true)) {
// vXi32 -> vXi8 must be performed as PACKUSWB(PACKSSDW,PACKSSDW).
- if (SVT == MVT::i8 && InSVT == MVT::i32) {
+ // Only do this when the result is at least 64 bits or we'll leaving
+ // dangling PACKSSDW nodes.
+ if (SVT == MVT::i8 && InSVT == MVT::i32 &&
+ VT.getVectorNumElements() >= 8) {
EVT MidVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
VT.getVectorNumElements());
SDValue Mid = truncateVectorWithPACK(X86ISD::PACKSS, MidVT, USatVal, DL,
DAG, Subtarget);
- if (Mid)
- return truncateVectorWithPACK(X86ISD::PACKUS, VT, Mid, DL, DAG,
- Subtarget);
+ assert(Mid && "Failed to pack!");
+ SDValue V = truncateVectorWithPACK(X86ISD::PACKUS, VT, Mid, DL, DAG,
+ Subtarget);
+ assert(V && "Failed to pack!");
+ return V;
} else if (SVT == MVT::i8 || Subtarget.hasSSE41())
return truncateVectorWithPACK(X86ISD::PACKUS, VT, USatVal, DL, DAG,
Subtarget);