Zeroable, Subtarget, DAG))
return Blend;
- bool IsV1Zero = ISD::isBuildVectorAllZeros(V1.getNode());
- bool IsV2Zero = ISD::isBuildVectorAllZeros(V2.getNode());
+ bool IsLowZero = (Zeroable & 0x3) == 0x3;
+ bool IsHighZero = (Zeroable & 0xc) == 0xc;
// If either input operand is a zero vector, use VPERM2X128 because its mask
// allows us to replace the zero input with an implicit zero.
- if (!IsV1Zero && !IsV2Zero) {
+ if (!IsLowZero && !IsHighZero) {
// Check for patterns which can be matched with a single insert of a 128-bit
// subvector.
bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1});
// [6] - ignore
// [7] - zero high half of destination
- int MaskLO = WidenedMask[0] < 0 ? 0 : WidenedMask[0];
- int MaskHI = WidenedMask[1] < 0 ? 0 : WidenedMask[1];
+ assert(WidenedMask[0] >= 0 && WidenedMask[1] >= 0 && "Undef half?");
- unsigned PermMask = MaskLO | (MaskHI << 4);
+ unsigned PermMask = 0;
+ PermMask |= IsLowZero ? 0x08 : (WidenedMask[0] << 0);
+ PermMask |= IsHighZero ? 0x80 : (WidenedMask[1] << 4);
- // If either input is a zero vector, replace it with an undef input.
- // Shuffle mask values < 4 are selecting elements of V1.
- // Shuffle mask values >= 4 are selecting elements of V2.
- // Adjust each half of the permute mask by clearing the half that was
- // selecting the zero vector and setting the zero mask bit.
- if (IsV1Zero) {
+ // Check the immediate mask and replace unused sources with undef.
+ if ((PermMask & 0x0a) != 0x00 && (PermMask & 0xa0) != 0x00)
V1 = DAG.getUNDEF(VT);
- if (MaskLO < 2)
- PermMask = (PermMask & 0xf0) | 0x08;
- if (MaskHI < 2)
- PermMask = (PermMask & 0x0f) | 0x80;
- }
- if (IsV2Zero) {
+ if ((PermMask & 0x0a) != 0x02 && (PermMask & 0xa0) != 0x20)
V2 = DAG.getUNDEF(VT);
- if (MaskLO >= 2)
- PermMask = (PermMask & 0xf0) | 0x08;
- if (MaskHI >= 2)
- PermMask = (PermMask & 0x0f) | 0x80;
- }
return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2,
DAG.getConstant(PermMask, DL, MVT::i8));