From bb23800c9f3decfdbf2d1c6cc966160953262329 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 4 Jul 2017 16:53:12 +0000 Subject: [PATCH] [X86][SSE4A] Generalized EXTRQI/INSERTQI shuffle decodes The existing decodes only worked for v16i8 vectors, this adds support for any 128-bit vector git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@307095 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../X86/InstPrinter/X86InstComments.cpp | 4 +- lib/Target/X86/Utils/X86ShuffleDecode.cpp | 56 +++++++++++-------- lib/Target/X86/Utils/X86ShuffleDecode.h | 8 +-- lib/Target/X86/X86ISelLowering.cpp | 4 +- 4 files changed, 41 insertions(+), 31 deletions(-) diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 5e809c34325..f5f3a4cc83d 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -1038,7 +1038,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::EXTRQI: if (MI->getOperand(2).isImm() && MI->getOperand(3).isImm()) - DecodeEXTRQIMask(MI->getOperand(2).getImm(), + DecodeEXTRQIMask(MVT::v16i8, MI->getOperand(2).getImm(), MI->getOperand(3).getImm(), ShuffleMask); @@ -1049,7 +1049,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::INSERTQI: if (MI->getOperand(3).isImm() && MI->getOperand(4).isImm()) - DecodeINSERTQIMask(MI->getOperand(3).getImm(), + DecodeINSERTQIMask(MVT::v16i8, MI->getOperand(3).getImm(), MI->getOperand(4).getImm(), ShuffleMask); diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 1be5aec849f..de7914360fd 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -452,15 +452,20 @@ void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl &Mask) { Mask.push_back(IsLoad ? static_cast(SM_SentinelZero) : i); } -void DecodeEXTRQIMask(int Len, int Idx, +void DecodeEXTRQIMask(MVT VT, int Len, int Idx, SmallVectorImpl &ShuffleMask) { + assert(VT.is128BitVector() && "Expected 128-bit vector"); + unsigned NumElts = VT.getVectorNumElements(); + unsigned EltSize = VT.getScalarSizeInBits(); + unsigned HalfElts = NumElts / 2; + // Only the bottom 6 bits are valid for each immediate. Len &= 0x3F; Idx &= 0x3F; // We can only decode this bit extraction instruction as a shuffle if both the - // length and index work with whole bytes. - if (0 != (Len % 8) || 0 != (Idx % 8)) + // length and index work with whole elements. + if (0 != (Len % EltSize) || 0 != (Idx % EltSize)) return; // A length of zero is equivalent to a bit length of 64. @@ -469,33 +474,38 @@ void DecodeEXTRQIMask(int Len, int Idx, // If the length + index exceeds the bottom 64 bits the result is undefined. if ((Len + Idx) > 64) { - ShuffleMask.append(16, SM_SentinelUndef); + ShuffleMask.append(NumElts, SM_SentinelUndef); return; } - // Convert index and index to work with bytes. - Len /= 8; - Idx /= 8; + // Convert index and index to work with elements. + Len /= EltSize; + Idx /= EltSize; - // EXTRQ: Extract Len bytes starting from Idx. Zero pad the remaining bytes - // of the lower 64-bits. The upper 64-bits are undefined. + // EXTRQ: Extract Len elements starting from Idx. Zero pad the remaining + // elements of the lower 64-bits. The upper 64-bits are undefined. for (int i = 0; i != Len; ++i) ShuffleMask.push_back(i + Idx); - for (int i = Len; i != 8; ++i) + for (int i = Len; i != HalfElts; ++i) ShuffleMask.push_back(SM_SentinelZero); - for (int i = 8; i != 16; ++i) + for (int i = HalfElts; i != NumElts; ++i) ShuffleMask.push_back(SM_SentinelUndef); } -void DecodeINSERTQIMask(int Len, int Idx, +void DecodeINSERTQIMask(MVT VT, int Len, int Idx, SmallVectorImpl &ShuffleMask) { + assert(VT.is128BitVector() && "Expected 128-bit vector"); + unsigned NumElts = VT.getVectorNumElements(); + unsigned EltSize = VT.getScalarSizeInBits(); + unsigned HalfElts = NumElts / 2; + // Only the bottom 6 bits are valid for each immediate. Len &= 0x3F; Idx &= 0x3F; // We can only decode this bit insertion instruction as a shuffle if both the - // length and index work with whole bytes. - if (0 != (Len % 8) || 0 != (Idx % 8)) + // length and index work with whole elements. + if (0 != (Len % EltSize) || 0 != (Idx % EltSize)) return; // A length of zero is equivalent to a bit length of 64. @@ -504,24 +514,24 @@ void DecodeINSERTQIMask(int Len, int Idx, // If the length + index exceeds the bottom 64 bits the result is undefined. if ((Len + Idx) > 64) { - ShuffleMask.append(16, SM_SentinelUndef); + ShuffleMask.append(NumElts, SM_SentinelUndef); return; } - // Convert index and index to work with bytes. - Len /= 8; - Idx /= 8; + // Convert index and index to work with elements. + Len /= EltSize; + Idx /= EltSize; - // INSERTQ: Extract lowest Len bytes from lower half of second source and - // insert over first source starting at Idx byte. The upper 64-bits are + // INSERTQ: Extract lowest Len elements from lower half of second source and + // insert over first source starting at Idx element. The upper 64-bits are // undefined. for (int i = 0; i != Idx; ++i) ShuffleMask.push_back(i); for (int i = 0; i != Len; ++i) - ShuffleMask.push_back(i + 16); - for (int i = Idx + Len; i != 8; ++i) + ShuffleMask.push_back(i + NumElts); + for (int i = Idx + Len; i != HalfElts; ++i) ShuffleMask.push_back(i); - for (int i = 8; i != 16; ++i) + for (int i = HalfElts; i != NumElts; ++i) ShuffleMask.push_back(SM_SentinelUndef); } diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 17619d09d05..251c9f7558e 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -134,12 +134,12 @@ void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl &ShuffleMask); void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl &ShuffleMask); -/// Decode a SSE4A EXTRQ instruction as a v16i8 shuffle mask. -void DecodeEXTRQIMask(int Len, int Idx, +/// Decode a SSE4A EXTRQ instruction as a shuffle mask. +void DecodeEXTRQIMask(MVT VT, int Len, int Idx, SmallVectorImpl &ShuffleMask); -/// Decode a SSE4A INSERTQ instruction as a v16i8 shuffle mask. -void DecodeINSERTQIMask(int Len, int Idx, +/// Decode a SSE4A INSERTQ instruction as a shuffle mask. +void DecodeINSERTQIMask(MVT VT, int Len, int Idx, SmallVectorImpl &ShuffleMask); /// Decode a VPERMILPD/VPERMILPS variable mask from a raw array of constants. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1f4bc356943..5fefaf4b644 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5561,7 +5561,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, isa(N->getOperand(2))) { int BitLen = N->getConstantOperandVal(1); int BitIdx = N->getConstantOperandVal(2); - DecodeEXTRQIMask(BitLen, BitIdx, Mask); + DecodeEXTRQIMask(VT, BitLen, BitIdx, Mask); IsUnary = true; } break; @@ -5570,7 +5570,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, isa(N->getOperand(3))) { int BitLen = N->getConstantOperandVal(2); int BitIdx = N->getConstantOperandVal(3); - DecodeINSERTQIMask(BitLen, BitIdx, Mask); + DecodeINSERTQIMask(VT, BitLen, BitIdx, Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); } break; -- 2.40.0