From 4a31d77be24019164745d0d160cab7fb229b8250 Mon Sep 17 00:00:00 2001 From: Dmitry Preobrazhensky Date: Wed, 31 May 2017 16:26:47 +0000 Subject: [PATCH] [AMDGPU][MC] New syntax for ds_swizzle_b32 offset See Bug 28601: https://bugs.llvm.org//show_bug.cgi?id=28601 Reviewers: artem.tamazov, vpykhtin Differential Revision: https://reviews.llvm.org/D33542 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@304309 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 324 +++++++++++++++++- lib/Target/AMDGPU/DSInstructions.td | 6 +- .../AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp | 106 ++++++ .../AMDGPU/InstPrinter/AMDGPUInstPrinter.h | 2 + lib/Target/AMDGPU/SIDefines.h | 40 +++ lib/Target/AMDGPU/SIInstrInfo.td | 13 + lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp | 13 + lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h | 6 + test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll | 2 +- test/MC/AMDGPU/ds-err.s | 90 +++++ test/MC/AMDGPU/ds.s | 144 +++++++- test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt | 20 +- 12 files changed, 746 insertions(+), 20 deletions(-) diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index f5541e08e1b..cc68c971b24 100644 --- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -161,7 +161,8 @@ public: ImmTyOpSel, ImmTyOpSelHi, ImmTyNegLo, - ImmTyNegHi + ImmTyNegHi, + ImmTySwizzle }; struct TokOp { @@ -474,6 +475,7 @@ public: bool isSWaitCnt() const; bool isHwreg() const; bool isSendMsg() const; + bool isSwizzle() const; bool isSMRDOffset8() const; bool isSMRDOffset20() const; bool isSMRDLiteralOffset() const; @@ -659,6 +661,7 @@ public: case ImmTyOpSelHi: OS << "OpSelHi"; break; case ImmTyNegLo: OS << "NegLo"; break; case ImmTyNegHi: OS << "NegHi"; break; + case ImmTySwizzle: OS << "Swizzle"; break; } } @@ -994,6 +997,12 @@ private: bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; + bool trySkipId(const StringRef Id); + bool trySkipToken(const AsmToken::TokenKind Kind); + bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); + bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); + bool parseExpr(int64_t &Imm); + public: OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); @@ -1003,6 +1012,19 @@ public: OperandMatchResultTy parseInterpAttr(OperandVector &Operands); OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); + bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, + const unsigned MinVal, + const unsigned MaxVal, + const StringRef ErrMsg); + OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); + bool parseSwizzleOffset(int64_t &Imm); + bool parseSwizzleMacro(int64_t &Imm); + bool parseSwizzleQuadPerm(int64_t &Imm); + bool parseSwizzleBitmaskPerm(int64_t &Imm); + bool parseSwizzleBroadcast(int64_t &Imm); + bool parseSwizzleSwap(int64_t &Imm); + bool parseSwizzleReverse(int64_t &Imm); + void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } @@ -2785,7 +2807,13 @@ void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, OptionalIdx[Op.getImmTy()] = i; } - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); + AMDGPUOperand::ImmTy OffsetType = + (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si || + Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : + AMDGPUOperand::ImmTyOffset; + + addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); + if (!IsGdsHardcoded) { addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); } @@ -3383,6 +3411,298 @@ bool AMDGPUOperand::isSendMsg() const { return isImmTy(ImmTySendMsg); } +//===----------------------------------------------------------------------===// +// parser helpers +//===----------------------------------------------------------------------===// + +bool +AMDGPUAsmParser::trySkipId(const StringRef Id) { + if (getLexer().getKind() == AsmToken::Identifier && + Parser.getTok().getString() == Id) { + Parser.Lex(); + return true; + } + return false; +} + +bool +AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { + if (getLexer().getKind() == Kind) { + Parser.Lex(); + return true; + } + return false; +} + +bool +AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, + const StringRef ErrMsg) { + if (!trySkipToken(Kind)) { + Error(Parser.getTok().getLoc(), ErrMsg); + return false; + } + return true; +} + +bool +AMDGPUAsmParser::parseExpr(int64_t &Imm) { + return !getParser().parseAbsoluteExpression(Imm); +} + +bool +AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { + SMLoc S = Parser.getTok().getLoc(); + if (getLexer().getKind() == AsmToken::String) { + Val = Parser.getTok().getStringContents(); + Parser.Lex(); + return true; + } else { + Error(S, ErrMsg); + return false; + } +} + +//===----------------------------------------------------------------------===// +// swizzle +//===----------------------------------------------------------------------===// + +LLVM_READNONE +static unsigned +encodeBitmaskPerm(const unsigned AndMask, + const unsigned OrMask, + const unsigned XorMask) { + using namespace llvm::AMDGPU::Swizzle; + + return BITMASK_PERM_ENC | + (AndMask << BITMASK_AND_SHIFT) | + (OrMask << BITMASK_OR_SHIFT) | + (XorMask << BITMASK_XOR_SHIFT); +} + +bool +AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, + const unsigned MinVal, + const unsigned MaxVal, + const StringRef ErrMsg) { + for (unsigned i = 0; i < OpNum; ++i) { + if (!skipToken(AsmToken::Comma, "expected a comma")){ + return false; + } + SMLoc ExprLoc = Parser.getTok().getLoc(); + if (!parseExpr(Op[i])) { + return false; + } + if (Op[i] < MinVal || Op[i] > MaxVal) { + Error(ExprLoc, ErrMsg); + return false; + } + } + + return true; +} + +bool +AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + int64_t Lane[LANE_NUM]; + if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, + "expected a 2-bit lane id")) { + Imm = QUAD_PERM_ENC; + for (auto i = 0; i < LANE_NUM; ++i) { + Imm |= Lane[i] << (LANE_SHIFT * i); + } + return true; + } + return false; +} + +bool +AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + SMLoc S = Parser.getTok().getLoc(); + int64_t GroupSize; + int64_t LaneIdx; + + if (!parseSwizzleOperands(1, &GroupSize, + 2, 32, + "group size must be in the interval [2,32]")) { + return false; + } + if (!isPowerOf2_64(GroupSize)) { + Error(S, "group size must be a power of two"); + return false; + } + if (parseSwizzleOperands(1, &LaneIdx, + 0, GroupSize - 1, + "lane id must be in the interval [0,group size - 1]")) { + Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); + return true; + } + return false; +} + +bool +AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + SMLoc S = Parser.getTok().getLoc(); + int64_t GroupSize; + + if (!parseSwizzleOperands(1, &GroupSize, + 2, 32, "group size must be in the interval [2,32]")) { + return false; + } + if (!isPowerOf2_64(GroupSize)) { + Error(S, "group size must be a power of two"); + return false; + } + + Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); + return true; +} + +bool +AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + SMLoc S = Parser.getTok().getLoc(); + int64_t GroupSize; + + if (!parseSwizzleOperands(1, &GroupSize, + 1, 16, "group size must be in the interval [1,16]")) { + return false; + } + if (!isPowerOf2_64(GroupSize)) { + Error(S, "group size must be a power of two"); + return false; + } + + Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); + return true; +} + +bool +AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + if (!skipToken(AsmToken::Comma, "expected a comma")) { + return false; + } + + StringRef Ctl; + SMLoc StrLoc = Parser.getTok().getLoc(); + if (!parseString(Ctl)) { + return false; + } + if (Ctl.size() != BITMASK_WIDTH) { + Error(StrLoc, "expected a 5-character mask"); + return false; + } + + unsigned AndMask = 0; + unsigned OrMask = 0; + unsigned XorMask = 0; + + for (size_t i = 0; i < Ctl.size(); ++i) { + unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); + switch(Ctl[i]) { + default: + Error(StrLoc, "invalid mask"); + return false; + case '0': + break; + case '1': + OrMask |= Mask; + break; + case 'p': + AndMask |= Mask; + break; + case 'i': + AndMask |= Mask; + XorMask |= Mask; + break; + } + } + + Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); + return true; +} + +bool +AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { + + SMLoc OffsetLoc = Parser.getTok().getLoc(); + + if (!parseExpr(Imm)) { + return false; + } + if (!isUInt<16>(Imm)) { + Error(OffsetLoc, "expected a 16-bit offset"); + return false; + } + return true; +} + +bool +AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + if (skipToken(AsmToken::LParen, "expected a left parentheses")) { + + SMLoc ModeLoc = Parser.getTok().getLoc(); + bool Ok = false; + + if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { + Ok = parseSwizzleQuadPerm(Imm); + } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { + Ok = parseSwizzleBitmaskPerm(Imm); + } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { + Ok = parseSwizzleBroadcast(Imm); + } else if (trySkipId(IdSymbolic[ID_SWAP])) { + Ok = parseSwizzleSwap(Imm); + } else if (trySkipId(IdSymbolic[ID_REVERSE])) { + Ok = parseSwizzleReverse(Imm); + } else { + Error(ModeLoc, "expected a swizzle mode"); + } + + return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); + } + + return false; +} + +OperandMatchResultTy +AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { + SMLoc S = Parser.getTok().getLoc(); + int64_t Imm = 0; + + if (trySkipId("offset")) { + + bool Ok = false; + if (skipToken(AsmToken::Colon, "expected a colon")) { + if (trySkipId("swizzle")) { + Ok = parseSwizzleMacro(Imm); + } else { + Ok = parseSwizzleOffset(Imm); + } + } + + Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); + + return Ok? MatchOperand_Success : MatchOperand_ParseFail; + } else { + return MatchOperand_NoMatch; + } +} + +bool +AMDGPUOperand::isSwizzle() const { + return isImmTy(ImmTySwizzle); +} + //===----------------------------------------------------------------------===// // sopp branch targets //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/DSInstructions.td b/lib/Target/AMDGPU/DSInstructions.td index 357e18108e7..fc516c3b39c 100644 --- a/lib/Target/AMDGPU/DSInstructions.td +++ b/lib/Target/AMDGPU/DSInstructions.td @@ -145,10 +145,10 @@ class DS_1A2D_Off8_RET +class DS_1A_RET : DS_Pseudo { let has_data0 = 0; @@ -440,7 +440,7 @@ def DS_WRITE_SRC2_B32 : DS_1A<"ds_write_src2_b32">; def DS_WRITE_SRC2_B64 : DS_1A<"ds_write_src2_b64">; let Uses = [EXEC], mayLoad = 0, mayStore = 0, isConvergent = 1 in { -def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32">; +def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32", VGPR_32, SwizzleImm>; } let mayStore = 0 in { diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp index a817ff3cbaf..523eea41897 100644 --- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -1160,6 +1160,112 @@ void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo, O << SImm16; // Unknown simm16 code. } +static void printSwizzleBitmask(const uint16_t AndMask, + const uint16_t OrMask, + const uint16_t XorMask, + raw_ostream &O) { + using namespace llvm::AMDGPU::Swizzle; + + uint16_t Probe0 = ((0 & AndMask) | OrMask) ^ XorMask; + uint16_t Probe1 = ((BITMASK_MASK & AndMask) | OrMask) ^ XorMask; + + O << "\""; + + for (unsigned Mask = 1 << (BITMASK_WIDTH - 1); Mask > 0; Mask >>= 1) { + uint16_t p0 = Probe0 & Mask; + uint16_t p1 = Probe1 & Mask; + + if (p0 == p1) { + if (p0 == 0) { + O << "0"; + } else { + O << "1"; + } + } else { + if (p0 == 0) { + O << "p"; + } else { + O << "i"; + } + } + } + + O << "\""; +} + +void AMDGPUInstPrinter::printSwizzle(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + using namespace llvm::AMDGPU::Swizzle; + + uint16_t Imm = MI->getOperand(OpNo).getImm(); + if (Imm == 0) { + return; + } + + O << " offset:"; + + if ((Imm & QUAD_PERM_ENC_MASK) == QUAD_PERM_ENC) { + + O << "swizzle(" << IdSymbolic[ID_QUAD_PERM]; + for (auto i = 0; i < LANE_NUM; ++i) { + O << ","; + O << formatDec(Imm & LANE_MASK); + Imm >>= LANE_SHIFT; + } + O << ")"; + + } else if ((Imm & BITMASK_PERM_ENC_MASK) == BITMASK_PERM_ENC) { + + uint16_t AndMask = (Imm >> BITMASK_AND_SHIFT) & BITMASK_MASK; + uint16_t OrMask = (Imm >> BITMASK_OR_SHIFT) & BITMASK_MASK; + uint16_t XorMask = (Imm >> BITMASK_XOR_SHIFT) & BITMASK_MASK; + + if (AndMask == BITMASK_MAX && + OrMask == 0 && + countPopulation(XorMask) == 1) { + + O << "swizzle(" << IdSymbolic[ID_SWAP]; + O << ","; + O << formatDec(XorMask); + O << ")"; + + } else if (AndMask == BITMASK_MAX && + OrMask == 0 && XorMask > 0 && + isPowerOf2_64(XorMask + 1)) { + + O << "swizzle(" << IdSymbolic[ID_REVERSE]; + O << ","; + O << formatDec(XorMask + 1); + O << ")"; + + } else { + + uint16_t GroupSize = BITMASK_MAX - AndMask + 1; + if (GroupSize > 1 && + isPowerOf2_64(GroupSize) && + OrMask < GroupSize && + XorMask == 0) { + + O << "swizzle(" << IdSymbolic[ID_BROADCAST]; + O << ","; + O << formatDec(GroupSize); + O << ","; + O << formatDec(OrMask); + O << ")"; + + } else { + O << "swizzle(" << IdSymbolic[ID_BITMASK_PERM]; + O << ","; + printSwizzleBitmask(AndMask, OrMask, XorMask, O); + O << ")"; + } + } + } else { + printU16ImmDecOperand(MI, OpNo, O); + } +} + void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h index c0b8e5c5108..c8094c4b840 100644 --- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h +++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h @@ -193,6 +193,8 @@ private: raw_ostream &O); void printSendMsg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printSwizzle(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); void printWaitFlag(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printHwreg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h index 80967edee0a..5cd90323ff6 100644 --- a/lib/Target/AMDGPU/SIDefines.h +++ b/lib/Target/AMDGPU/SIDefines.h @@ -281,6 +281,46 @@ enum WidthMinusOne { // WidthMinusOne, (5) [15:11] } // namespace Hwreg +namespace Swizzle { // Encoding of swizzle macro used in ds_swizzle_b32. + +enum Id { // id of symbolic names + ID_QUAD_PERM = 0, + ID_BITMASK_PERM, + ID_SWAP, + ID_REVERSE, + ID_BROADCAST +}; + +enum EncBits { + + // swizzle mode encodings + + QUAD_PERM_ENC = 0x8000, + QUAD_PERM_ENC_MASK = 0xFF00, + + BITMASK_PERM_ENC = 0x0000, + BITMASK_PERM_ENC_MASK = 0x8000, + + // QUAD_PERM encodings + + LANE_MASK = 0x3, + LANE_MAX = LANE_MASK, + LANE_SHIFT = 2, + LANE_NUM = 4, + + // BITMASK_PERM encodings + + BITMASK_MASK = 0x1F, + BITMASK_MAX = BITMASK_MASK, + BITMASK_WIDTH = 5, + + BITMASK_AND_SHIFT = 0, + BITMASK_OR_SHIFT = 5, + BITMASK_XOR_SHIFT = 10 +}; + +} // namespace Swizzle + namespace SDWA { enum SdwaSel { diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index c5287c7f64b..445bf79a781 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -383,6 +383,14 @@ def SendMsgMatchClass : AsmOperandClass { let RenderMethod = "addImmOperands"; } +def SwizzleMatchClass : AsmOperandClass { + let Name = "Swizzle"; + let PredicateMethod = "isSwizzle"; + let ParserMethod = "parseSwizzleOp"; + let RenderMethod = "addImmOperands"; + let IsOptional = 1; +} + def ExpTgtMatchClass : AsmOperandClass { let Name = "ExpTgt"; let PredicateMethod = "isExpTgt"; @@ -395,6 +403,11 @@ def SendMsgImm : Operand { let ParserMatchClass = SendMsgMatchClass; } +def SwizzleImm : Operand { + let PrintMethod = "printSwizzle"; + let ParserMatchClass = SwizzleMatchClass; +} + def SWaitMatchClass : AsmOperandClass { let Name = "SWaitCnt"; let RenderMethod = "addImmOperands"; diff --git a/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp index b6868de6a74..03b11ae8050 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp +++ b/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp @@ -65,5 +65,18 @@ const char* const IdSymbolic[] = { }; } // namespace Hwreg + +namespace Swizzle { + +// This must be in sync with llvm::AMDGPU::Swizzle::Id enum members, see SIDefines.h. +const char* const IdSymbolic[] = { + "QUAD_PERM", + "BITMASK_PERM", + "SWAP", + "REVERSE", + "BROADCAST", +}; + +} // namespace Swizzle } // namespace AMDGPU } // namespace llvm diff --git a/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h b/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h index b2dc2c0e364..ebb2be22b48 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h +++ b/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h @@ -25,6 +25,12 @@ namespace Hwreg { // Symbolic names for the hwreg(...) syntax. extern const char* const IdSymbolic[]; } // namespace Hwreg + +namespace Swizzle { // Symbolic names for the swizzle(...) syntax. + +extern const char* const IdSymbolic[]; + +} // namespace Swizzle } // namespace AMDGPU } // namespace llvm diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll index a3a78d326a6..02642142ae2 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll @@ -4,7 +4,7 @@ declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #0 ; FUNC-LABEL: {{^}}ds_swizzle: -; CHECK: ds_swizzle_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:100 +; CHECK: ds_swizzle_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:swizzle(BITMASK_PERM,"00p11") ; CHECK: s_waitcnt lgkmcnt define amdgpu_kernel void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) nounwind { %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0 diff --git a/test/MC/AMDGPU/ds-err.s b/test/MC/AMDGPU/ds-err.s index 3951efbb60f..d9f22f5f3ed 100644 --- a/test/MC/AMDGPU/ds-err.s +++ b/test/MC/AMDGPU/ds-err.s @@ -21,3 +21,93 @@ ds_write2_b32 v2, v4, v6 offset0:1000000000 // CHECK: invalid operand for instruction ds_write2_b32 v2, v4, v6 offset1:1000000000 +//===----------------------------------------------------------------------===// +// swizzle +//===----------------------------------------------------------------------===// + +// CHECK: error: expected a colon +ds_swizzle_b32 v8, v2 offset + +// CHECK: error: failed parsing operand +ds_swizzle_b32 v8, v2 offset: + +// CHECK: error: expected a colon +ds_swizzle_b32 v8, v2 offset- + +// CHECK: error: expected absolute expression +ds_swizzle_b32 v8, v2 offset:SWIZZLE(QUAD_PERM, 0, 1, 2, 3) + +// CHECK: error: expected a swizzle mode +ds_swizzle_b32 v8, v2 offset:swizzle(quad_perm, 0, 1, 2, 3) + +// CHECK: error: expected a swizzle mode +ds_swizzle_b32 v8, v2 offset:swizzle(XXX,1) + +// CHECK: error: expected a comma +ds_swizzle_b32 v8, v2 offset:swizzle(QUAD_PERM + +// CHECK: error: expected a comma +ds_swizzle_b32 v8, v2 offset:swizzle(QUAD_PERM, 0, 1, 2) + +// CHECK: error: expected a closing parentheses +ds_swizzle_b32 v8, v2 offset:swizzle(QUAD_PERM, 0, 1, 2, 3 + +// CHECK: error: expected a closing parentheses +ds_swizzle_b32 v8, v2 offset:swizzle(QUAD_PERM, 0, 1, 2, 3, 4) + +// CHECK: error: expected a 2-bit lane id +ds_swizzle_b32 v8, v2 offset:swizzle(QUAD_PERM, -1, 1, 2, 3) + +// CHECK: error: expected a 2-bit lane id +ds_swizzle_b32 v8, v2 offset:swizzle(QUAD_PERM, 4, 1, 2, 3) + +// CHECK: error: group size must be in the interval [1,16] +ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,0) + +// CHECK: error: group size must be a power of two +ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,3) + +// CHECK: error: group size must be in the interval [1,16] +ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,17) + +// CHECK: error: group size must be in the interval [1,16] +ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,32) + +// CHECK: error: group size must be in the interval [2,32] +ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,1) + +// CHECK: error: group size must be a power of two +ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,3) + +// CHECK: error: group size must be in the interval [2,32] +ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,33) + +// CHECK: error: group size must be in the interval [2,32] +ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,1,0) + +// CHECK: error: group size must be a power of two +ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,3,1) + +// CHECK: error: group size must be in the interval [2,32] +ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,33,1) + +// CHECK: error: lane id must be in the interval [0,group size - 1] +ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,2,-1) + +// CHECK: error: lane id must be in the interval [0,group size - 1] +ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,2,2) + +// CHECK: error: expected a string +ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM, pppii) + +// CHECK: error: expected a 5-character mask +ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM, "") + +// CHECK: error: expected a 5-character mask +ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM, "ppii") + +// CHECK: error: expected a 5-character mask +ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM, "pppiii") + +// CHECK: invalid mask +ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM, "pppi2") diff --git a/test/MC/AMDGPU/ds.s b/test/MC/AMDGPU/ds.s index 18e4957e32d..ef36a98f746 100644 --- a/test/MC/AMDGPU/ds.s +++ b/test/MC/AMDGPU/ds.s @@ -267,10 +267,6 @@ ds_max_rtn_f32 v8, v2, v4 // SICI: ds_max_rtn_f32 v8, v2, v4 ; encoding: [0x00,0x00,0xcc,0xd8,0x02,0x04,0x00,0x08] // VI: ds_max_rtn_f32 v8, v2, v4 ; encoding: [0x00,0x00,0x66,0xd8,0x02,0x04,0x00,0x08] -ds_swizzle_b32 v8, v2 -// SICI: ds_swizzle_b32 v8, v2 ; encoding: [0x00,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08] -// VI: ds_swizzle_b32 v8, v2 ; encoding: [0x00,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08] - ds_read_b32 v8, v2 // SICI: ds_read_b32 v8, v2 ; encoding: [0x00,0x00,0xd8,0xd8,0x02,0x00,0x00,0x08] // VI: ds_read_b32 v8, v2 ; encoding: [0x00,0x00,0x6c,0xd8,0x02,0x00,0x00,0x08] @@ -506,3 +502,143 @@ ds_nop // NOSI: error: instruction not supported on this GPU // CI: ds_nop ; encoding: [0x00,0x00,0x50,0xd8,0x00,0x00,0x00,0x00] // VI: ds_nop ; encoding: [0x00,0x00,0x28,0xd8,0x00,0x00,0x00,0x00] + +//===----------------------------------------------------------------------===// +// swizzle +//===----------------------------------------------------------------------===// + +ds_swizzle_b32 v8, v2 +// SICI: ds_swizzle_b32 v8, v2 ; encoding: [0x00,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 ; encoding: [0x00,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:0xFFFF +// SICI: ds_swizzle_b32 v8, v2 offset:65535 ; encoding: [0xff,0xff,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:65535 ; encoding: [0xff,0xff,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:swizzle(QUAD_PERM, 0, 1, 2, 3) +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(QUAD_PERM,0,1,2,3) ; encoding: [0xe4,0x80,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(QUAD_PERM,0,1,2,3) ; encoding: [0xe4,0x80,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:swizzle(QUAD_PERM, 2, 1, 3, 3) +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(QUAD_PERM,2,1,3,3) ; encoding: [0xf6,0x80,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(QUAD_PERM,2,1,3,3) ; encoding: [0xf6,0x80,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,1) +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,1) ; encoding: [0x1f,0x04,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,1) ; encoding: [0x1f,0x04,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,2) +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,2) ; encoding: [0x1f,0x08,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,2) ; encoding: [0x1f,0x08,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,4) +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,4) ; encoding: [0x1f,0x10,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,4) ; encoding: [0x1f,0x10,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,8) +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,8) ; encoding: [0x1f,0x20,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,8) ; encoding: [0x1f,0x20,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,16) +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,16) ; encoding: [0x1f,0x40,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,16) ; encoding: [0x1f,0x40,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,2) +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,1) ; encoding: [0x1f,0x04,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,1) ; encoding: [0x1f,0x04,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,4) +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,4) ; encoding: [0x1f,0x0c,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,4) ; encoding: [0x1f,0x0c,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,8) +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,8) ; encoding: [0x1f,0x1c,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,8) ; encoding: [0x1f,0x1c,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,16) +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,16) ; encoding: [0x1f,0x3c,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,16) ; encoding: [0x1f,0x3c,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,32) +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,32) ; encoding: [0x1f,0x7c,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,32) ; encoding: [0x1f,0x7c,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,2,1) +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,2,1) ; encoding: [0x3e,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,2,1) ; encoding: [0x3e,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,4,1) +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,4,1) ; encoding: [0x3c,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,4,1) ; encoding: [0x3c,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,8,1) +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,8,1) ; encoding: [0x38,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,8,1) ; encoding: [0x38,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,16,1) +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,16,1) ; encoding: [0x30,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,16,1) ; encoding: [0x30,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,32,1) +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,32,1) ; encoding: [0x20,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,32,1) ; encoding: [0x20,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,2,0) +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,2,0) ; encoding: [0x1e,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,2,0) ; encoding: [0x1e,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,4,3) +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,4,3) ; encoding: [0x7c,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,4,3) ; encoding: [0x7c,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,8,7) +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,8,7) ; encoding: [0xf8,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,8,7) ; encoding: [0xf8,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,16,15) +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,16,15) ; encoding: [0xf0,0x01,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,16,15) ; encoding: [0xf0,0x01,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,32,31) +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,32,31) ; encoding: [0xe0,0x03,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,32,31) ; encoding: [0xe0,0x03,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM, "pppii") +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,4) ; encoding: [0x1f,0x0c,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,4) ; encoding: [0x1f,0x0c,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM, "01pip") +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"01pip") ; encoding: [0x07,0x09,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"01pip") ; encoding: [0x07,0x09,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:0x000 +// SICI: ds_swizzle_b32 v8, v2 ; encoding: [0x00,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 ; encoding: [0x00,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:0x001 +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"0000p") ; encoding: [0x01,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"0000p") ; encoding: [0x01,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:0x020 +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,32,1) ; encoding: [0x20,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,32,1) ; encoding: [0x20,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:0x021 +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"00001") ; encoding: [0x21,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"00001") ; encoding: [0x21,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:0x400 +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"00001") ; encoding: [0x00,0x04,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"00001") ; encoding: [0x00,0x04,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:0x401 +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"0000i") ; encoding: [0x01,0x04,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"0000i") ; encoding: [0x01,0x04,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:0x420 +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"00000") ; encoding: [0x20,0x04,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"00000") ; encoding: [0x20,0x04,0x7a,0xd8,0x02,0x00,0x00,0x08] + +ds_swizzle_b32 v8, v2 offset:0x421 +// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"00000") ; encoding: [0x21,0x04,0xd4,0xd8,0x02,0x00,0x00,0x08] +// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"00000") ; encoding: [0x21,0x04,0x7a,0xd8,0x02,0x00,0x00,0x08] diff --git a/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt b/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt index 37725e960f9..5fe7a8cd062 100644 --- a/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt +++ b/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt @@ -990,23 +990,23 @@ # CHECK: ds_read_u16 v5, v1 offset:65535 gds ; encoding: [0xff,0xff,0x79,0xd8,0x01,0x00,0x00,0x05] 0xff,0xff,0x79,0xd8,0x01,0x00,0x00,0x05 -# CHECK: ds_swizzle_b32 v5, v1 offset:65535 ; encoding: [0xff,0xff,0x7a,0xd8,0x01,0x00,0x00,0x05] -0xff,0xff,0x7a,0xd8,0x01,0x00,0x00,0x05 +# CHECK: ds_swizzle_b32 v5, v1 ; encoding: [0x00,0x00,0x7a,0xd8,0x01,0x00,0x00,0x05] +0x00,0x00,0x7a,0xd8,0x01,0x00,0x00,0x05 -# CHECK: ds_swizzle_b32 v255, v1 offset:65535 ; encoding: [0xff,0xff,0x7a,0xd8,0x01,0x00,0x00,0xff] -0xff,0xff,0x7a,0xd8,0x01,0x00,0x00,0xff +# CHECK: ds_swizzle_b32 v255, v1 ; encoding: [0x00,0x00,0x7a,0xd8,0x01,0x00,0x00,0xff] +0x00,0x00,0x7a,0xd8,0x01,0x00,0x00,0xff -# CHECK: ds_swizzle_b32 v5, v255 offset:65535 ; encoding: [0xff,0xff,0x7a,0xd8,0xff,0x00,0x00,0x05] -0xff,0xff,0x7a,0xd8,0xff,0x00,0x00,0x05 +# CHECK: ds_swizzle_b32 v5, v255 ; encoding: [0x00,0x00,0x7a,0xd8,0xff,0x00,0x00,0x05] +0x00,0x00,0x7a,0xd8,0xff,0x00,0x00,0x05 # CHECK: ds_swizzle_b32 v5, v1 ; encoding: [0x00,0x00,0x7a,0xd8,0x01,0x00,0x00,0x05] 0x00,0x00,0x7a,0xd8,0x01,0x00,0x00,0x05 -# CHECK: ds_swizzle_b32 v5, v1 offset:4 ; encoding: [0x04,0x00,0x7a,0xd8,0x01,0x00,0x00,0x05] -0x04,0x00,0x7a,0xd8,0x01,0x00,0x00,0x05 +# CHECK: ds_swizzle_b32 v5, v1 ; encoding: [0x00,0x00,0x7a,0xd8,0x01,0x00,0x00,0x05] +0x00,0x00,0x7a,0xd8,0x01,0x00,0x00,0x05 -# CHECK: ds_swizzle_b32 v5, v1 offset:65535 gds ; encoding: [0xff,0xff,0x7b,0xd8,0x01,0x00,0x00,0x05] -0xff,0xff,0x7b,0xd8,0x01,0x00,0x00,0x05 +# CHECK: ds_swizzle_b32 v5, v1 gds ; encoding: [0x00,0x00,0x7b,0xd8,0x01,0x00,0x00,0x05] +0x00,0x00,0x7b,0xd8,0x01,0x00,0x00,0x05 # CHECK: ds_permute_b32 v5, v1, v2 offset:65535 ; encoding: [0xff,0xff,0x7c,0xd8,0x01,0x02,0x00,0x05] 0xff,0xff,0x7c,0xd8,0x01,0x02,0x00,0x05 -- 2.40.0