From 203bf432d6de39d15d9d4f504d6dac2909f6cb1d Mon Sep 17 00:00:00 2001 From: Dmitry Preobrazhensky Date: Mon, 4 Mar 2019 12:48:32 +0000 Subject: [PATCH] [AMDGPU][MC] Enable lds_direct operand for v_readfirstlane_b32, v_readlane_b32 and v_writelane_b32 See bug 40662: https://bugs.llvm.org/show_bug.cgi?id=40662 Reviewers: artem.tamazov, arsenm, rampitec Differential Revision: https://reviews.llvm.org/D58713 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@355312 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 117 +++++++++++------- .../Disassembler/AMDGPUDisassembler.cpp | 13 ++ .../AMDGPU/Disassembler/AMDGPUDisassembler.h | 3 + lib/Target/AMDGPU/SIRegisterInfo.td | 16 +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 2 + lib/Target/AMDGPU/VOP1Instructions.td | 2 +- lib/Target/AMDGPU/VOP2Instructions.td | 4 +- test/MC/AMDGPU/lds_direct-ci.s | 10 ++ test/MC/AMDGPU/lds_direct-err.s | 52 +++++--- test/MC/AMDGPU/lds_direct.s | 13 +- .../Disassembler/AMDGPU/lds_direct_gfx9.txt | 6 + 11 files changed, 169 insertions(+), 69 deletions(-) create mode 100644 test/MC/AMDGPU/lds_direct-ci.s diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 0dff52668c6..74f34297697 100644 --- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -415,6 +415,11 @@ public: return isSSrcF16(); } + bool isSSrcOrLdsB32() const { + return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || + isLiteralImm(MVT::i32) || isExpr(); + } + bool isVCSrcB32() const { return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); } @@ -2477,6 +2482,73 @@ bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { return true; } +static bool IsRevOpcode(const unsigned Opcode) +{ + switch (Opcode) { + case AMDGPU::V_SUBREV_F32_e32: + case AMDGPU::V_SUBREV_F32_e64: + case AMDGPU::V_SUBREV_F32_e32_si: + case AMDGPU::V_SUBREV_F32_e32_vi: + case AMDGPU::V_SUBREV_F32_e64_si: + case AMDGPU::V_SUBREV_F32_e64_vi: + case AMDGPU::V_SUBREV_I32_e32: + case AMDGPU::V_SUBREV_I32_e64: + case AMDGPU::V_SUBREV_I32_e32_si: + case AMDGPU::V_SUBREV_I32_e64_si: + case AMDGPU::V_SUBBREV_U32_e32: + case AMDGPU::V_SUBBREV_U32_e64: + case AMDGPU::V_SUBBREV_U32_e32_si: + case AMDGPU::V_SUBBREV_U32_e32_vi: + case AMDGPU::V_SUBBREV_U32_e64_si: + case AMDGPU::V_SUBBREV_U32_e64_vi: + case AMDGPU::V_SUBREV_U32_e32: + case AMDGPU::V_SUBREV_U32_e64: + case AMDGPU::V_SUBREV_U32_e32_gfx9: + case AMDGPU::V_SUBREV_U32_e32_vi: + case AMDGPU::V_SUBREV_U32_e64_gfx9: + case AMDGPU::V_SUBREV_U32_e64_vi: + case AMDGPU::V_SUBREV_F16_e32: + case AMDGPU::V_SUBREV_F16_e64: + case AMDGPU::V_SUBREV_F16_e32_vi: + case AMDGPU::V_SUBREV_F16_e64_vi: + case AMDGPU::V_SUBREV_U16_e32: + case AMDGPU::V_SUBREV_U16_e64: + case AMDGPU::V_SUBREV_U16_e32_vi: + case AMDGPU::V_SUBREV_U16_e64_vi: + case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: + case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: + case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: + case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: + case AMDGPU::V_LSHLREV_B32_e32_si: + case AMDGPU::V_LSHLREV_B32_e64_si: + case AMDGPU::V_LSHLREV_B16_e32_vi: + case AMDGPU::V_LSHLREV_B16_e64_vi: + case AMDGPU::V_LSHLREV_B32_e32_vi: + case AMDGPU::V_LSHLREV_B32_e64_vi: + case AMDGPU::V_LSHLREV_B64_vi: + case AMDGPU::V_LSHRREV_B32_e32_si: + case AMDGPU::V_LSHRREV_B32_e64_si: + case AMDGPU::V_LSHRREV_B16_e32_vi: + case AMDGPU::V_LSHRREV_B16_e64_vi: + case AMDGPU::V_LSHRREV_B32_e32_vi: + case AMDGPU::V_LSHRREV_B32_e64_vi: + case AMDGPU::V_LSHRREV_B64_vi: + case AMDGPU::V_ASHRREV_I32_e64_si: + case AMDGPU::V_ASHRREV_I32_e32_si: + case AMDGPU::V_ASHRREV_I16_e32_vi: + case AMDGPU::V_ASHRREV_I16_e64_vi: + case AMDGPU::V_ASHRREV_I32_e32_vi: + case AMDGPU::V_ASHRREV_I32_e64_vi: + case AMDGPU::V_ASHRREV_I64_vi: + case AMDGPU::V_PK_LSHLREV_B16_vi: + case AMDGPU::V_PK_LSHRREV_B16_vi: + case AMDGPU::V_PK_ASHRREV_I16_vi: + return true; + default: + return false; + } +} + bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { using namespace SIInstrFlags; @@ -2511,50 +2583,7 @@ bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { return true; // lds_direct is specified as src0. Check additional limitations. - - // FIXME: This is a workaround for bug 37943 - // which allows 64-bit VOP3 opcodes use 32-bit operands. - if (AMDGPU::getRegOperandSize(getMRI(), Desc, Src0Idx) != 4) - return false; - - // Documentation does not disable lds_direct for SDWA, but SP3 assembler does. - // FIXME: This inconsistence needs to be investigated further. - if (Desc.TSFlags & SIInstrFlags::SDWA) - return false; - - // The following opcodes do not accept lds_direct which is explicitly stated - // in AMD documentation. However SP3 disables lds_direct for most other 'rev' - // opcodes as well (e.g. for v_subrev_u32 but not for v_subrev_f32). - // FIXME: This inconsistence needs to be investigated further. - switch (Opcode) { - case AMDGPU::V_LSHLREV_B32_e32_si: - case AMDGPU::V_LSHLREV_B32_e64_si: - case AMDGPU::V_LSHLREV_B16_e32_vi: - case AMDGPU::V_LSHLREV_B16_e64_vi: - case AMDGPU::V_LSHLREV_B32_e32_vi: - case AMDGPU::V_LSHLREV_B32_e64_vi: - case AMDGPU::V_LSHLREV_B64_vi: - case AMDGPU::V_LSHRREV_B32_e32_si: - case AMDGPU::V_LSHRREV_B32_e64_si: - case AMDGPU::V_LSHRREV_B16_e32_vi: - case AMDGPU::V_LSHRREV_B16_e64_vi: - case AMDGPU::V_LSHRREV_B32_e32_vi: - case AMDGPU::V_LSHRREV_B32_e64_vi: - case AMDGPU::V_LSHRREV_B64_vi: - case AMDGPU::V_ASHRREV_I32_e64_si: - case AMDGPU::V_ASHRREV_I32_e32_si: - case AMDGPU::V_ASHRREV_I16_e32_vi: - case AMDGPU::V_ASHRREV_I16_e64_vi: - case AMDGPU::V_ASHRREV_I32_e32_vi: - case AMDGPU::V_ASHRREV_I32_e64_vi: - case AMDGPU::V_ASHRREV_I64_vi: - case AMDGPU::V_PK_LSHLREV_B16_vi: - case AMDGPU::V_PK_LSHRREV_B16_vi: - case AMDGPU::V_PK_ASHRREV_I16_vi: - return false; - default: - return true; - } + return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); } bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 9ae1bcd44ec..0acd30f5408 100644 --- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -97,6 +97,7 @@ static DecodeStatus StaticDecoderName(MCInst &Inst, \ DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass) DECODE_OPERAND_REG(VGPR_32) +DECODE_OPERAND_REG(VRegOrLds_32) DECODE_OPERAND_REG(VS_32) DECODE_OPERAND_REG(VS_64) DECODE_OPERAND_REG(VS_128) @@ -108,6 +109,7 @@ DECODE_OPERAND_REG(VReg_128) DECODE_OPERAND_REG(SReg_32) DECODE_OPERAND_REG(SReg_32_XM0_XEXEC) DECODE_OPERAND_REG(SReg_32_XEXEC_HI) +DECODE_OPERAND_REG(SRegOrLds_32) DECODE_OPERAND_REG(SReg_64) DECODE_OPERAND_REG(SReg_64_XEXEC) DECODE_OPERAND_REG(SReg_128) @@ -469,6 +471,10 @@ MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const { return createRegOperand(AMDGPU::VGPR_32RegClassID, Val); } +MCOperand AMDGPUDisassembler::decodeOperand_VRegOrLds_32(unsigned Val) const { + return decodeSrcOp(OPW32, Val); +} + MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const { return createRegOperand(AMDGPU::VReg_64RegClassID, Val); } @@ -500,6 +506,13 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI( return decodeOperand_SReg_32(Val); } +MCOperand AMDGPUDisassembler::decodeOperand_SRegOrLds_32(unsigned Val) const { + // table-gen generated disassembler doesn't care about operand types + // leaving only registry class so SSrc_32 operand turns into SReg_32 + // and therefore we accept immediates and literals here as well + return decodeSrcOp(OPW32, Val); +} + MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const { return decodeSrcOp(OPW64, Val); } diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index 4e1c75ff57b..9d7fedb97c2 100644 --- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -71,6 +71,8 @@ public: DecodeStatus convertMIMGInst(MCInst &MI) const; MCOperand decodeOperand_VGPR_32(unsigned Val) const; + MCOperand decodeOperand_VRegOrLds_32(unsigned Val) const; + MCOperand decodeOperand_VS_32(unsigned Val) const; MCOperand decodeOperand_VS_64(unsigned Val) const; MCOperand decodeOperand_VS_128(unsigned Val) const; @@ -84,6 +86,7 @@ public: MCOperand decodeOperand_SReg_32(unsigned Val) const; MCOperand decodeOperand_SReg_32_XM0_XEXEC(unsigned Val) const; MCOperand decodeOperand_SReg_32_XEXEC_HI(unsigned Val) const; + MCOperand decodeOperand_SRegOrLds_32(unsigned Val) const; MCOperand decodeOperand_SReg_64(unsigned Val) const; MCOperand decodeOperand_SReg_64_XEXEC(unsigned Val) const; MCOperand decodeOperand_SReg_128(unsigned Val) const; diff --git a/lib/Target/AMDGPU/SIRegisterInfo.td b/lib/Target/AMDGPU/SIRegisterInfo.td index 84751d1cbfe..5f04a8c816c 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/lib/Target/AMDGPU/SIRegisterInfo.td @@ -442,6 +442,11 @@ def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, let AllocationPriority = 7; } +def SRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, + (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI, LDS_DIRECT_CLASS)> { + let isAllocatable = 0; +} + def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32, (add SGPR_64Regs)> { let CopyCost = 1; let AllocationPriority = 8; @@ -511,6 +516,11 @@ def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, let AllocationPriority = 12; } +def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, + (add VGPR_32, LDS_DIRECT_CLASS)> { + let isAllocatable = 0; +} + // Register class for all vector registers (VGPRs + Interploation Registers) def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32, (add VGPR_64)> { let Size = 64; @@ -631,6 +641,12 @@ multiclass RegInlineOperand defm SSrc : RegImmOperand<"SReg", "SSrc">; +def SSrcOrLds_b32 : RegisterOperand { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_REG_IMM_INT32"; + let ParserMatchClass = RegImmMatcher<"SSrcOrLdsB32">; +} + //===----------------------------------------------------------------------===// // SCSrc_* Operands with an SGPR or a inline constant //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index e1727338599..96128749081 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -802,9 +802,11 @@ unsigned getRegBitWidth(unsigned RCID) { switch (RCID) { case AMDGPU::SGPR_32RegClassID: case AMDGPU::VGPR_32RegClassID: + case AMDGPU::VRegOrLds_32RegClassID: case AMDGPU::VS_32RegClassID: case AMDGPU::SReg_32RegClassID: case AMDGPU::SReg_32_XM0RegClassID: + case AMDGPU::SRegOrLds_32RegClassID: return 32; case AMDGPU::SGPR_64RegClassID: case AMDGPU::VS_64RegClassID: diff --git a/lib/Target/AMDGPU/VOP1Instructions.td b/lib/Target/AMDGPU/VOP1Instructions.td index 15006155d9b..aafe0339acf 100644 --- a/lib/Target/AMDGPU/VOP1Instructions.td +++ b/lib/Target/AMDGPU/VOP1Instructions.td @@ -142,7 +142,7 @@ defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>; // TODO: Make profile for this, there is VOP3 encoding also def V_READFIRSTLANE_B32 : InstSI <(outs SReg_32:$vdst), - (ins VGPR_32:$src0), + (ins VRegOrLds_32:$src0), "v_readfirstlane_b32 $vdst, $src0", [(set i32:$vdst, (int_amdgcn_readfirstlane i32:$src0))]>, Enc32 { diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td index ee165515ce7..9a0a81c97ef 100644 --- a/lib/Target/AMDGPU/VOP2Instructions.td +++ b/lib/Target/AMDGPU/VOP2Instructions.td @@ -360,7 +360,7 @@ def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { def VOP_READLANE : VOPProfile<[i32, i32, i32]> { let Outs32 = (outs SReg_32:$vdst); let Outs64 = Outs32; - let Ins32 = (ins VGPR_32:$src0, SCSrc_b32:$src1); + let Ins32 = (ins VRegOrLds_32:$src0, SCSrc_b32:$src1); let Ins64 = Ins32; let Asm32 = " $vdst, $src0, $src1"; let Asm64 = Asm32; @@ -765,7 +765,7 @@ defm V_SUBBREV_U32 : VOP2be_Real_e32e64_si <0x2a>; defm V_READLANE_B32 : VOP2_Real_si <0x01>; -let InOperandList = (ins SSrc_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) in { +let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) in { defm V_WRITELANE_B32 : VOP2_Real_si <0x02>; } diff --git a/test/MC/AMDGPU/lds_direct-ci.s b/test/MC/AMDGPU/lds_direct-ci.s new file mode 100644 index 00000000000..c8d3955042b --- /dev/null +++ b/test/MC/AMDGPU/lds_direct-ci.s @@ -0,0 +1,10 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=CI + +v_readfirstlane_b32 s0, lds_direct +// CI: v_readfirstlane_b32 s0, src_lds_direct ; encoding: [0xfe,0x04,0x00,0x7e] + +v_readlane_b32 s0, lds_direct, s0 +// CI: v_readlane_b32 s0, src_lds_direct, s0 ; encoding: [0xfe,0x00,0x00,0x02] + +v_writelane_b32 v0, lds_direct, s0 +// CI: v_writelane_b32 v0, src_lds_direct, s0 ; encoding: [0xfe,0x00,0x00,0x04] diff --git a/test/MC/AMDGPU/lds_direct-err.s b/test/MC/AMDGPU/lds_direct-err.s index 25a5cda6a54..578461bc35a 100644 --- a/test/MC/AMDGPU/lds_direct-err.s +++ b/test/MC/AMDGPU/lds_direct-err.s @@ -5,55 +5,77 @@ //---------------------------------------------------------------------------// s_and_b32 s2, lds_direct, s1 -// NOGFX9: error +// NOGFX9: error: invalid operand for instruction //---------------------------------------------------------------------------// -// lds_direct may not be used with V_{LSHL,LSHR,ASHL}REV opcodes +// lds_direct may not be used with "REV" opcodes //---------------------------------------------------------------------------// v_ashrrev_i16 v0, lds_direct, v0 -// NOGFX9: error +// NOGFX9: error: invalid use of lds_direct v_ashrrev_i32 v0, lds_direct, v0 -// NOGFX9: error +// NOGFX9: error: invalid use of lds_direct v_lshlrev_b16 v0, lds_direct, v0 -// NOGFX9: error +// NOGFX9: error: invalid use of lds_direct v_lshlrev_b32 v0, lds_direct, v0 -// NOGFX9: error +// NOGFX9: error: invalid use of lds_direct v_lshrrev_b16 v0, lds_direct, v0 -// NOGFX9: error +// NOGFX9: error: invalid use of lds_direct v_lshrrev_b32 v0, lds_direct, v0 -// NOGFX9: error +// NOGFX9: error: invalid use of lds_direct v_pk_ashrrev_i16 v0, lds_direct, v0 -// NOGFX9: error +// NOGFX9: error: invalid use of lds_direct v_pk_lshlrev_b16 v0, lds_direct, v0 -// NOGFX9: error +// NOGFX9: error: invalid use of lds_direct v_pk_lshrrev_b16 v0, lds_direct, v0 -// NOGFX9: error +// NOGFX9: error: invalid use of lds_direct + +v_subbrev_co_u32 v0, vcc, src_lds_direct, v0, vcc +// NOGFX9: error: invalid use of lds_direct + +v_subrev_co_u32 v0, vcc, src_lds_direct, v0 +// NOGFX9: error: invalid use of lds_direct + +v_subrev_f16 v0, src_lds_direct, v0 +// NOGFX9: error: invalid use of lds_direct + +v_subrev_u16 v0, src_lds_direct, v0 +// NOGFX9: error: invalid use of lds_direct + +v_subrev_u32 v0, src_lds_direct, v0 +// NOGFX9: error: invalid use of lds_direct + +//---------------------------------------------------------------------------// +// lds_direct may not be used with v_writelane_b32 for VI/GFX9 +//---------------------------------------------------------------------------// + +v_writelane_b32 v0, lds_direct, s0 +// NOGFX9: error: instruction not supported on this GPU //---------------------------------------------------------------------------// // lds_direct cannot be used with 64-bit and larger operands //---------------------------------------------------------------------------// v_add_f64 v[0:1], lds_direct, v[0:1] -// NOGFX9: error +// NOGFX9: error: invalid operand for instruction //---------------------------------------------------------------------------// // Only SRC0 may specify lds_direct //---------------------------------------------------------------------------// v_add_i32 v0, v0, lds_direct -// NOGFX9: error +// NOGFX9: error: invalid use of lds_direct v_add_i32 lds_direct, v0, v0 -// NOGFX9: error +// NOGFX9: error: invalid operand for instruction v_fma_f32 v0, v0, v0, lds_direct -// NOGFX9: error +// NOGFX9: error: invalid use of lds_direct diff --git a/test/MC/AMDGPU/lds_direct.s b/test/MC/AMDGPU/lds_direct.s index 33f631e79b0..6a879ee03d1 100644 --- a/test/MC/AMDGPU/lds_direct.s +++ b/test/MC/AMDGPU/lds_direct.s @@ -31,6 +31,9 @@ v_fract_f32_e64 v0, src_lds_direct v_cvt_f16_u16 v0, src_lds_direct // GFX9: v_cvt_f16_u16_e32 v0, src_lds_direct ; encoding: [0xfe,0x72,0x00,0x7e] +v_readfirstlane_b32 s0, src_lds_direct +// GFX9: v_readfirstlane_b32 s0, src_lds_direct ; encoding: [0xfe,0x04,0x00,0x7e] + //---------------------------------------------------------------------------// // VOP2/3 //---------------------------------------------------------------------------// @@ -78,6 +81,9 @@ v_min3_i16 v0, src_lds_direct, v0, v0 v_max3_f16 v0, src_lds_direct, v0, v0 // GFX9: v_max3_f16 v0, src_lds_direct, v0, v0 ; encoding: [0x00,0x00,0xf7,0xd1,0xfe,0x00,0x02,0x04] +v_readlane_b32 s0, src_lds_direct, s0 +// GFX9: v_readlane_b32 s0, src_lds_direct, s0 ; encoding: [0x00,0x00,0x89,0xd2,0xfe,0x00,0x00,0x00] + //---------------------------------------------------------------------------// // VOP3P //---------------------------------------------------------------------------// @@ -107,10 +113,3 @@ v_cmpx_neq_f32 vcc, src_lds_direct, v0 v_cmp_lt_f16 vcc, lds_direct, v0 // GFX9: v_cmp_lt_f16_e32 vcc, src_lds_direct, v0 ; encoding: [0xfe,0x00,0x42,0x7c] - -//---------------------------------------------------------------------------// -// FIXME: enable lds_direct for the following opcodes and add tests -//---------------------------------------------------------------------------// - -//v_readfirstlane_b32 s0, src_lds_direct -//v_readlane_b32 s0, src_lds_direct, s0 diff --git a/test/MC/Disassembler/AMDGPU/lds_direct_gfx9.txt b/test/MC/Disassembler/AMDGPU/lds_direct_gfx9.txt index 3480d6f2a68..e29c4d2f62b 100644 --- a/test/MC/Disassembler/AMDGPU/lds_direct_gfx9.txt +++ b/test/MC/Disassembler/AMDGPU/lds_direct_gfx9.txt @@ -17,3 +17,9 @@ # GFX9: v_cmpx_le_i32_e32 vcc, src_lds_direct, v0 ; encoding: [0xfe,0x00,0xa6,0x7d] 0xfe,0x00,0xa6,0x7d + +# GFX9: v_readlane_b32 s0, src_lds_direct, s0 ; encoding: [0x00,0x00,0x89,0xd2,0xfe,0x00,0x00,0x00] +0x00,0x00,0x89,0xd2,0xfe,0x00,0x00,0x00 + +# GFX9: v_readfirstlane_b32 s0, src_lds_direct ; encoding: [0xfe,0x04,0x00,0x7e] +0xfe,0x04,0x00,0x7e -- 2.50.1