Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i);
}
-void DecodeEXTRQIMask(int Len, int Idx,
+void DecodeEXTRQIMask(MVT VT, int Len, int Idx,
SmallVectorImpl<int> &ShuffleMask) {
+ assert(VT.is128BitVector() && "Expected 128-bit vector");
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSize = VT.getScalarSizeInBits();
+ unsigned HalfElts = NumElts / 2;
+
// Only the bottom 6 bits are valid for each immediate.
Len &= 0x3F;
Idx &= 0x3F;
// We can only decode this bit extraction instruction as a shuffle if both the
- // length and index work with whole bytes.
- if (0 != (Len % 8) || 0 != (Idx % 8))
+ // length and index work with whole elements.
+ if (0 != (Len % EltSize) || 0 != (Idx % EltSize))
return;
// A length of zero is equivalent to a bit length of 64.
// If the length + index exceeds the bottom 64 bits the result is undefined.
if ((Len + Idx) > 64) {
- ShuffleMask.append(16, SM_SentinelUndef);
+ ShuffleMask.append(NumElts, SM_SentinelUndef);
return;
}
- // Convert index and index to work with bytes.
- Len /= 8;
- Idx /= 8;
+ // Convert index and index to work with elements.
+ Len /= EltSize;
+ Idx /= EltSize;
- // EXTRQ: Extract Len bytes starting from Idx. Zero pad the remaining bytes
- // of the lower 64-bits. The upper 64-bits are undefined.
+ // EXTRQ: Extract Len elements starting from Idx. Zero pad the remaining
+ // elements of the lower 64-bits. The upper 64-bits are undefined.
for (int i = 0; i != Len; ++i)
ShuffleMask.push_back(i + Idx);
- for (int i = Len; i != 8; ++i)
+ for (int i = Len; i != HalfElts; ++i)
ShuffleMask.push_back(SM_SentinelZero);
- for (int i = 8; i != 16; ++i)
+ for (int i = HalfElts; i != NumElts; ++i)
ShuffleMask.push_back(SM_SentinelUndef);
}
-void DecodeINSERTQIMask(int Len, int Idx,
+void DecodeINSERTQIMask(MVT VT, int Len, int Idx,
SmallVectorImpl<int> &ShuffleMask) {
+ assert(VT.is128BitVector() && "Expected 128-bit vector");
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSize = VT.getScalarSizeInBits();
+ unsigned HalfElts = NumElts / 2;
+
// Only the bottom 6 bits are valid for each immediate.
Len &= 0x3F;
Idx &= 0x3F;
// We can only decode this bit insertion instruction as a shuffle if both the
- // length and index work with whole bytes.
- if (0 != (Len % 8) || 0 != (Idx % 8))
+ // length and index work with whole elements.
+ if (0 != (Len % EltSize) || 0 != (Idx % EltSize))
return;
// A length of zero is equivalent to a bit length of 64.
// If the length + index exceeds the bottom 64 bits the result is undefined.
if ((Len + Idx) > 64) {
- ShuffleMask.append(16, SM_SentinelUndef);
+ ShuffleMask.append(NumElts, SM_SentinelUndef);
return;
}
- // Convert index and index to work with bytes.
- Len /= 8;
- Idx /= 8;
+ // Convert index and index to work with elements.
+ Len /= EltSize;
+ Idx /= EltSize;
- // INSERTQ: Extract lowest Len bytes from lower half of second source and
- // insert over first source starting at Idx byte. The upper 64-bits are
+ // INSERTQ: Extract lowest Len elements from lower half of second source and
+ // insert over first source starting at Idx element. The upper 64-bits are
// undefined.
for (int i = 0; i != Idx; ++i)
ShuffleMask.push_back(i);
for (int i = 0; i != Len; ++i)
- ShuffleMask.push_back(i + 16);
- for (int i = Idx + Len; i != 8; ++i)
+ ShuffleMask.push_back(i + NumElts);
+ for (int i = Idx + Len; i != HalfElts; ++i)
ShuffleMask.push_back(i);
- for (int i = 8; i != 16; ++i)
+ for (int i = HalfElts; i != NumElts; ++i)
ShuffleMask.push_back(SM_SentinelUndef);
}
void DecodeScalarMoveMask(MVT VT, bool IsLoad,
SmallVectorImpl<int> &ShuffleMask);
-/// Decode a SSE4A EXTRQ instruction as a v16i8 shuffle mask.
-void DecodeEXTRQIMask(int Len, int Idx,
+/// Decode a SSE4A EXTRQ instruction as a shuffle mask.
+void DecodeEXTRQIMask(MVT VT, int Len, int Idx,
SmallVectorImpl<int> &ShuffleMask);
-/// Decode a SSE4A INSERTQ instruction as a v16i8 shuffle mask.
-void DecodeINSERTQIMask(int Len, int Idx,
+/// Decode a SSE4A INSERTQ instruction as a shuffle mask.
+void DecodeINSERTQIMask(MVT VT, int Len, int Idx,
SmallVectorImpl<int> &ShuffleMask);
/// Decode a VPERMILPD/VPERMILPS variable mask from a raw array of constants.
isa<ConstantSDNode>(N->getOperand(2))) {
int BitLen = N->getConstantOperandVal(1);
int BitIdx = N->getConstantOperandVal(2);
- DecodeEXTRQIMask(BitLen, BitIdx, Mask);
+ DecodeEXTRQIMask(VT, BitLen, BitIdx, Mask);
IsUnary = true;
}
break;
isa<ConstantSDNode>(N->getOperand(3))) {
int BitLen = N->getConstantOperandVal(2);
int BitIdx = N->getConstantOperandVal(3);
- DecodeINSERTQIMask(BitLen, BitIdx, Mask);
+ DecodeINSERTQIMask(VT, BitLen, BitIdx, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
}
break;