[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty],
[IntrNoMem, IntrConvergent]>;
+// llvm.amdgcn.mov.dpp8.i32 <src> <sel>
+// <sel> is a 32-bit constant whose high 8 bits must be zero which selects
+// the lanes to read from.
+def int_amdgcn_mov_dpp8 :
+ Intrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, llvm_i32_ty],
+ [IntrNoMem, IntrConvergent]>;
+
def int_amdgcn_s_get_waveid_in_workgroup :
GCCBuiltin<"__builtin_amdgcn_s_get_waveid_in_workgroup">,
Intrinsic<[llvm_i32_ty], [], [IntrReadMem]>;
"Support DPP (Data Parallel Primitives) extension"
>;
+// DPP8 allows arbitrary cross-lane swizzling withing groups of 8 lanes.
+def FeatureDPP8 : SubtargetFeature<"dpp8",
+ "HasDPP8",
+ "true",
+ "Support DPP8 (Data Parallel Primitives) extension"
+>;
+
def FeatureR128A16 : SubtargetFeature<"r128-a16",
"HasR128A16",
"true",
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts,
FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking,
- FeatureVOP3Literal, FeatureNoDataDepHazard,
+ FeatureVOP3Literal, FeatureDPP8, FeatureNoDataDepHazard,
FeatureDoesNotSupportSRAMECC
]
>;
def HasDPP : Predicate<"Subtarget->hasDPP()">,
AssemblerPredicate<"FeatureGCN3Encoding,FeatureDPP">;
+def HasDPP8 : Predicate<"Subtarget->hasDPP8()">,
+ AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureDPP8">;
+
def HasR128A16 : Predicate<"Subtarget->hasR128A16()">,
AssemblerPredicate<"FeatureR128A16">;
+def HasDPP16 : Predicate<"Subtarget->hasDPP()">,
+ AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureDPP">;
+
def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">,
AssemblerPredicate<"FeatureIntClamp">;
HasSDWAMac(false),
HasSDWAOutModsVOPC(false),
HasDPP(false),
+ HasDPP8(false),
HasR128A16(false),
HasNSAEncoding(false),
HasDLInsts(false),
bool HasSDWAMac;
bool HasSDWAOutModsVOPC;
bool HasDPP;
+ bool HasDPP8;
bool HasR128A16;
bool HasNSAEncoding;
bool HasDLInsts;
return HasDPP;
}
+ bool hasDPP8() const {
+ return HasDPP8;
+ }
+
bool hasR128A16() const {
return HasR128A16;
}
ImmTyD16,
ImmTyClampSI,
ImmTyOModSI,
+ ImmTyDPP8,
ImmTyDppCtrl,
ImmTyDppRowMask,
ImmTyDppBankMask,
ImmTyDppBoundCtrl,
+ ImmTyDppFi,
ImmTySdwaDstSel,
ImmTySdwaSrc0Sel,
ImmTySdwaSrc1Sel,
bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
+ bool isFI() const { return isImmTy(ImmTyDppFi); }
bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
bool isSMRDOffset8() const;
bool isSMRDOffset20() const;
bool isSMRDLiteralOffset() const;
+ bool isDPP8() const;
bool isDPPCtrl() const;
bool isGPRIdxMode() const;
bool isS16Imm() const;
case ImmTyFORMAT: OS << "FORMAT"; break;
case ImmTyClampSI: OS << "ClampSI"; break;
case ImmTyOModSI: OS << "OModSI"; break;
+ case ImmTyDPP8: OS << "DPP8"; break;
case ImmTyDppCtrl: OS << "DppCtrl"; break;
case ImmTyDppRowMask: OS << "DppRowMask"; break;
case ImmTyDppBankMask: OS << "DppBankMask"; break;
case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
+ case ImmTyDppFi: OS << "FI"; break;
case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
OperandMatchResultTy parseDim(OperandVector &Operands);
+ OperandMatchResultTy parseDPP8(OperandVector &Operands);
OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
AMDGPUOperand::Ptr defaultRowMask() const;
AMDGPUOperand::Ptr defaultBankMask() const;
AMDGPUOperand::Ptr defaultBoundCtrl() const;
- void cvtDPP(MCInst &Inst, const OperandVector &Operands);
+ AMDGPUOperand::Ptr defaultFI() const;
+ void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
+ void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
AMDGPUOperand::ImmTy Type);
{"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
{"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
{"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
+ {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr},
{"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
{"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
{"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
// dpp
//===----------------------------------------------------------------------===//
+bool AMDGPUOperand::isDPP8() const {
+ return isImmTy(ImmTyDPP8);
+}
+
bool AMDGPUOperand::isDPPCtrl() const {
using namespace AMDGPU::DPP;
(Imm == DppCtrl::ROW_MIRROR) ||
(Imm == DppCtrl::ROW_HALF_MIRROR) ||
(Imm == DppCtrl::BCAST15) ||
- (Imm == DppCtrl::BCAST31);
+ (Imm == DppCtrl::BCAST31) ||
+ (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
+ (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
}
return false;
}
return MatchOperand_Success;
}
+OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ StringRef Prefix;
+
+ if (getLexer().getKind() == AsmToken::Identifier) {
+ Prefix = Parser.getTok().getString();
+ } else {
+ return MatchOperand_NoMatch;
+ }
+
+ if (Prefix != "dpp8")
+ return parseDPPCtrl(Operands);
+ if (!isGFX10())
+ return MatchOperand_NoMatch;
+
+ // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
+
+ int64_t Sels[8];
+
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::Colon))
+ return MatchOperand_ParseFail;
+
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::LBrac))
+ return MatchOperand_ParseFail;
+
+ Parser.Lex();
+ if (getParser().parseAbsoluteExpression(Sels[0]))
+ return MatchOperand_ParseFail;
+ if (0 > Sels[0] || 7 < Sels[0])
+ return MatchOperand_ParseFail;
+
+ for (size_t i = 1; i < 8; ++i) {
+ if (getLexer().isNot(AsmToken::Comma))
+ return MatchOperand_ParseFail;
+
+ Parser.Lex();
+ if (getParser().parseAbsoluteExpression(Sels[i]))
+ return MatchOperand_ParseFail;
+ if (0 > Sels[i] || 7 < Sels[i])
+ return MatchOperand_ParseFail;
+ }
+
+ if (getLexer().isNot(AsmToken::RBrac))
+ return MatchOperand_ParseFail;
+ Parser.Lex();
+
+ unsigned DPP8 = 0;
+ for (size_t i = 0; i < 8; ++i)
+ DPP8 |= (Sels[i] << (i * 3));
+
+ Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
+ return MatchOperand_Success;
+}
+
OperandMatchResultTy
AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
using namespace AMDGPU::DPP;
&& Prefix != "wave_rol"
&& Prefix != "wave_shr"
&& Prefix != "wave_ror"
- && Prefix != "row_bcast") {
+ && Prefix != "row_bcast"
+ && Prefix != "row_share"
+ && Prefix != "row_xmask") {
return MatchOperand_NoMatch;
}
+ if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
+ return MatchOperand_NoMatch;
+
+ if (!isVI() && !isGFX9() &&
+ (Prefix == "wave_shl" || Prefix == "wave_shr" ||
+ Prefix == "wave_rol" || Prefix == "wave_ror" ||
+ Prefix == "row_bcast"))
+ return MatchOperand_NoMatch;
+
Parser.Lex();
if (getLexer().isNot(AsmToken::Colon))
return MatchOperand_ParseFail;
} else {
return MatchOperand_ParseFail;
}
+ } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
+ Int |= DppCtrl::ROW_SHARE_FIRST;
+ } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
+ Int |= DppCtrl::ROW_XMASK_FIRST;
} else {
return MatchOperand_ParseFail;
}
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
}
-void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
+}
+
+void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
OptionalImmIndexMap OptionalIdx;
unsigned I = 1;
((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
}
+ int Fi = 0;
for (unsigned E = Operands.size(); I != E; ++I) {
auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
MCOI::TIED_TO);
// Skip it.
continue;
}
- if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
- Op.addRegWithFPInputModsOperands(Inst, 2);
- } else if (Op.isDPPCtrl()) {
- Op.addImmOperands(Inst, 1);
- } else if (Op.isImm()) {
- // Handle optional arguments
- OptionalIdx[Op.getImmTy()] = I;
+
+ if (IsDPP8) {
+ if (Op.isDPP8()) {
+ Op.addImmOperands(Inst, 1);
+ } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
+ Op.addRegWithFPInputModsOperands(Inst, 2);
+ } else if (Op.isFI()) {
+ Fi = Op.getImm();
+ } else if (Op.isReg()) {
+ Op.addRegOperands(Inst, 1);
+ } else {
+ llvm_unreachable("Invalid operand type");
+ }
} else {
- llvm_unreachable("Invalid operand type");
+ if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
+ Op.addRegWithFPInputModsOperands(Inst, 2);
+ } else if (Op.isDPPCtrl()) {
+ Op.addImmOperands(Inst, 1);
+ } else if (Op.isImm()) {
+ // Handle optional arguments
+ OptionalIdx[Op.getImmTy()] = I;
+ } else {
+ llvm_unreachable("Invalid operand type");
+ }
}
}
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
+ if (IsDPP8) {
+ using namespace llvm::AMDGPU::DPP;
+ Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
+ } else {
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
+ if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
+ }
+ }
}
//===----------------------------------------------------------------------===//
return MCDisassembler::Fail;
}
+static bool isValidDPP8(const MCInst &MI) {
+ using namespace llvm::AMDGPU::DPP;
+ int FiIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::fi);
+ assert(FiIdx != -1);
+ if ((unsigned)FiIdx >= MI.getNumOperands())
+ return false;
+ unsigned Fi = MI.getOperand(FiIdx).getImm();
+ return Fi == DPP8_FI_0 || Fi == DPP8_FI_1;
+}
+
DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes_,
uint64_t Address,
// encodings
if (Bytes.size() >= 8) {
const uint64_t QW = eatBytes<uint64_t>(Bytes);
+
+ Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address);
+ if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
+ break;
+
+ MI = MCInst(); // clear
+
Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address);
if (Res) break;
return MCDisassembler::Success;
}
+DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
+ unsigned Opc = MI.getOpcode();
+ unsigned DescNumOps = MCII->get(Opc).getNumOperands();
+
+ // Insert dummy unused src modifiers.
+ if (MI.getNumOperands() < DescNumOps &&
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1)
+ insertNamedMCOperand(MI, MCOperand::createImm(0),
+ AMDGPU::OpName::src0_modifiers);
+
+ if (MI.getNumOperands() < DescNumOps &&
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers) != -1)
+ insertNamedMCOperand(MI, MCOperand::createImm(0),
+ AMDGPU::OpName::src1_modifiers);
+
+ return isValidDPP8(MI) ? MCDisassembler::Success : MCDisassembler::SoftFail;
+}
+
// Note that before gfx10, the MIMG encoding provided no information about
// VADDR size. Consequently, decoded instructions always show address as if it
// has 1 dword, which could be not really so.
uint64_t Address) const;
DecodeStatus convertSDWAInst(MCInst &MI) const;
+ DecodeStatus convertDPP8Inst(MCInst &MI) const;
DecodeStatus convertMIMGInst(MCInst &MI) const;
MCOperand decodeOperand_VGPR_32(unsigned Val) const;
bool isVI() const;
bool isGFX9() const;
bool isGFX10() const;
- };
+};
//===----------------------------------------------------------------------===//
// AMDGPUSymbolizer
case AMDGPU::V_ADD_CO_CI_U32_sdwa_gfx10:
case AMDGPU::V_SUB_CO_CI_U32_sdwa_gfx10:
case AMDGPU::V_SUBREV_CO_CI_U32_sdwa_gfx10:
+ case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx10:
+ case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx10:
+ case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx10:
+ case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10:
+ case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10:
+ case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10:
printDefaultVccOperand(1, STI, O);
break;
}
case AMDGPU::V_ADD_CO_CI_U32_e32_gfx10:
case AMDGPU::V_SUB_CO_CI_U32_e32_gfx10:
case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
+ case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx10:
+ case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx10:
+ case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx10:
+ case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10:
+ case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10:
+ case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10:
case AMDGPU::V_CNDMASK_B32_e32_gfx6_gfx7:
case AMDGPU::V_CNDMASK_B32_e32_vi:
}
}
+void AMDGPUInstPrinter::printDPP8(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ if (!AMDGPU::isGFX10(STI))
+ llvm_unreachable("dpp8 is not supported on ASICs earlier than GFX10");
+
+ unsigned Imm = MI->getOperand(OpNo).getImm();
+ O << " dpp8:[" << formatDec(Imm & 0x7);
+ for (size_t i = 1; i < 8; ++i) {
+ O << ',' << formatDec((Imm >> (3 * i)) & 0x7);
+ }
+ O << ']';
+}
+
void AMDGPUInstPrinter::printDPPCtrl(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
O << " row_ror:";
printU4ImmDecOperand(MI, OpNo, O);
} else if (Imm == DppCtrl::WAVE_SHL1) {
+ if (!AMDGPU::isVI(STI) && !AMDGPU::isGFX9(STI)) {
+ O << " /* wave_shl is not supported starting from GFX10 */";
+ return;
+ }
O << " wave_shl:1";
} else if (Imm == DppCtrl::WAVE_ROL1) {
+ if (!AMDGPU::isVI(STI) && !AMDGPU::isGFX9(STI)) {
+ O << " /* wave_rol is not supported starting from GFX10 */";
+ return;
+ }
O << " wave_rol:1";
} else if (Imm == DppCtrl::WAVE_SHR1) {
+ if (!AMDGPU::isVI(STI) && !AMDGPU::isGFX9(STI)) {
+ O << " /* wave_shr is not supported starting from GFX10 */";
+ return;
+ }
O << " wave_shr:1";
} else if (Imm == DppCtrl::WAVE_ROR1) {
+ if (!AMDGPU::isVI(STI) && !AMDGPU::isGFX9(STI)) {
+ O << " /* wave_ror is not supported starting from GFX10 */";
+ return;
+ }
O << " wave_ror:1";
} else if (Imm == DppCtrl::ROW_MIRROR) {
O << " row_mirror";
} else if (Imm == DppCtrl::ROW_HALF_MIRROR) {
O << " row_half_mirror";
} else if (Imm == DppCtrl::BCAST15) {
+ if (!AMDGPU::isVI(STI) && !AMDGPU::isGFX9(STI)) {
+ O << " /* row_bcast is not supported starting from GFX10 */";
+ return;
+ }
O << " row_bcast:15";
} else if (Imm == DppCtrl::BCAST31) {
+ if (!AMDGPU::isVI(STI) && !AMDGPU::isGFX9(STI)) {
+ O << " /* row_bcast is not supported starting from GFX10 */";
+ return;
+ }
O << " row_bcast:31";
+ } else if ((Imm >= DppCtrl::ROW_SHARE_FIRST) &&
+ (Imm <= DppCtrl::ROW_SHARE_LAST)) {
+ if (!AMDGPU::isGFX10(STI)) {
+ O << " /* row_share is not supported on ASICs earlier than GFX10 */";
+ return;
+ }
+ O << " row_share:";
+ printU4ImmDecOperand(MI, OpNo, O);
+ } else if ((Imm >= DppCtrl::ROW_XMASK_FIRST) &&
+ (Imm <= DppCtrl::ROW_XMASK_LAST)) {
+ if (!AMDGPU::isGFX10(STI)) {
+ O << " /* row_xmask is not supported on ASICs earlier than GFX10 */";
+ return;
+ }
+ O << "row_xmask:";
+ printU4ImmDecOperand(MI, OpNo, O);
} else {
O << " /* Invalid dpp_ctrl value */";
}
}
}
+void AMDGPUInstPrinter::printFI(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ using namespace llvm::AMDGPU::DPP;
+ unsigned Imm = MI->getOperand(OpNo).getImm();
+ if (Imm == DPP_FI_1 || Imm == DPP8_FI_1) {
+ O << " fi:1";
+ }
+}
+
void AMDGPUInstPrinter::printSDWASel(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
using namespace llvm::AMDGPU::SDWA;
const MCSubtargetInfo &STI, raw_ostream &O);
void printOperandAndIntInputMods(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printDPP8(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printDPPCtrl(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printRowMask(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
const MCSubtargetInfo &STI, raw_ostream &O);
void printBoundCtrl(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printFI(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printSDWASel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printSDWADstSel(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
ROW_HALF_MIRROR = 0x141,
BCAST15 = 0x142,
BCAST31 = 0x143,
- DPP_LAST = BCAST31
+ DPP_UNUSED8_FIRST = 0x144,
+ DPP_UNUSED8_LAST = 0x14F,
+ ROW_SHARE_FIRST = 0x150,
+ ROW_SHARE_LAST = 0x15F,
+ ROW_XMASK_FIRST = 0x160,
+ ROW_XMASK_LAST = 0x16F,
+ DPP_LAST = ROW_XMASK_LAST
+};
+
+enum DppFiMode {
+ DPP_FI_0 = 0,
+ DPP_FI_1 = 1,
+ DPP8_FI_0 = 0xE9,
+ DPP8_FI_1 = 0xEA,
};
} // namespace DPP
(DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
(DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
(DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
- (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST)) {
+ (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
+ (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
ErrInfo = "Invalid dpp_ctrl value";
return false;
}
+ if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
+ ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
+ ErrInfo = "Invalid dpp_ctrl value: "
+ "wavefront shifts are not supported on GFX10+";
+ return false;
+ }
+ if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
+ ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
+ ErrInfo = "Invalid dpp_ctrl value: "
+ "broadcats are not supported on GFX10+";
+ return false;
+ }
+ if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
+ ST.getGeneration() < AMDGPUSubtarget::GFX10) {
+ ErrInfo = "Invalid dpp_ctrl value: "
+ "row_share and row_xmask are not supported before GFX10";
+ return false;
+ }
}
return true;
def DMask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>;
def Dim : NamedOperandU8<"Dim", NamedMatchClass<"Dim", 0>>;
+def dpp8 : NamedOperandU32<"DPP8", NamedMatchClass<"DPP8", 0>>;
+
def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>;
def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>;
def bank_mask : NamedOperandU32<"BankMask", NamedMatchClass<"BankMask">>;
def bound_ctrl : NamedOperandBit<"BoundCtrl", NamedMatchClass<"BoundCtrl">>;
+def FI : NamedOperandU32<"FI", NamedMatchClass<"FI">>;
def dst_sel : NamedOperandU32<"SDWADstSel", NamedMatchClass<"SDWADstSel">>;
def src0_sel : NamedOperandU32<"SDWASrc0Sel", NamedMatchClass<"SDWASrc0Sel">>;
/* endif */)));
}
+class getInsDPP16 <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC,
+ int NumSrcArgs, bit HasModifiers,
+ Operand Src0Mod, Operand Src1Mod> {
+ dag ret = !con(getInsDPP<DstRC, Src0RC, Src1RC, NumSrcArgs,
+ HasModifiers, Src0Mod, Src1Mod>.ret,
+ (ins FI:$fi));
+}
+
+class getInsDPP8 <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC,
+ int NumSrcArgs, bit HasModifiers,
+ Operand Src0Mod, Operand Src1Mod> {
+ dag ret = !if (!eq(NumSrcArgs, 0),
+ // VOP1 without input operands (V_NOP)
+ (ins dpp8:$dpp8, FI:$fi),
+ !if (!eq(NumSrcArgs, 1),
+ !if (!eq(HasModifiers, 1),
+ // VOP1_DPP with modifiers
+ (ins DstRC:$old, Src0Mod:$src0_modifiers,
+ Src0RC:$src0, dpp8:$dpp8, FI:$fi)
+ /* else */,
+ // VOP1_DPP without modifiers
+ (ins DstRC:$old, Src0RC:$src0, dpp8:$dpp8, FI:$fi)
+ /* endif */)
+ /* NumSrcArgs == 2 */,
+ !if (!eq(HasModifiers, 1),
+ // VOP2_DPP with modifiers
+ (ins DstRC:$old,
+ Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ dpp8:$dpp8, FI:$fi)
+ /* else */,
+ // VOP2_DPP without modifiers
+ (ins DstRC:$old,
+ Src0RC:$src0, Src1RC:$src1, dpp8:$dpp8, FI:$fi)
+ /* endif */)));
+}
// Ins for SDWA
string ret = dst#args#" $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
}
+class getAsmDPP16 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
+ string ret = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret#"$fi";
+}
+
+class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
+ string dst = !if(HasDst,
+ !if(!eq(DstVT.Size, 1),
+ "$sdst",
+ "$vdst"),
+ ""); // use $sdst for VOPC
+ string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
+ string src1 = !if(!eq(NumSrcArgs, 1), "",
+ !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
+ " $src1_modifiers,"));
+ string args = !if(!eq(HasModifiers, 0),
+ getAsm32<0, NumSrcArgs, DstVT>.ret,
+ ", "#src0#src1);
+ string ret = dst#args#"$dpp8$fi";
+}
+
class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
string dst = !if(HasDst,
!if(!eq(DstVT.Size, 1),
field dag Outs32 = Outs;
field dag Outs64 = Outs;
field dag OutsDPP = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
+ field dag OutsDPP8 = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret;
field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
getInsDPP<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
HasModifiers, Src0ModDPP, Src1ModDPP>.ret,
(ins));
+ field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
+ HasModifiers, Src0ModDPP, Src1ModDPP>.ret;
+ field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs, 0,
+ Src0ModDPP, Src1ModDPP>.ret;
field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
HasSDWAOMod, Src0ModSDWA, Src1ModSDWA,
DstVT>.ret;
HasSrc2FloatMods>.ret;
field string AsmDPP = !if(HasExtDPP,
getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret, "");
+ field string AsmDPP16 = getAsmDPP16<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
+ field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0, DstVT>.ret;
field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret;
field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret;
let InsDPP = (ins DstRC:$vdst, DstRC:$old, Src0RC32:$src0,
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+ let InsDPP16 = !con(InsDPP, (ins FI:$fi));
let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused,
let Asm32 = getAsm32<1, 1>.ret;
let Asm64 = getAsm64<1, 1, 0, 0, 1>.ret;
let AsmDPP = getAsmDPP<1, 1, 0>.ret;
+ let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret;
let AsmSDWA = getAsmSDWA<1, 1>.ret;
let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret;
// Target-specific instruction encodings.
//===----------------------------------------------------------------------===//
-class VOP1_DPP<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> :
- VOP_DPP<ps.OpName, p> {
+class VOP1_DPP<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP16 = 0> :
+ VOP_DPP<ps.OpName, p, isDPP16> {
let hasSideEffects = ps.hasSideEffects;
let Defs = ps.Defs;
let SchedRW = ps.SchedRW;
let Inst{31-25} = 0x3f;
}
+class VOP1_DPP16<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> :
+ VOP1_DPP<op, ps, p, 1> {
+ let AssemblerPredicate = !if(p.HasExt, HasDPP16, DisableInst);
+ let SubtargetPredicate = HasDPP16;
+}
+
+class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> :
+ VOP_DPP8<ps.OpName, p> {
+ let hasSideEffects = ps.hasSideEffects;
+ let Defs = ps.Defs;
+ let SchedRW = ps.SchedRW;
+ let Uses = ps.Uses;
+
+ bits<8> vdst;
+ let Inst{8-0} = fi;
+ let Inst{16-9} = op;
+ let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0);
+ let Inst{31-25} = 0x3f;
+
+ let AssemblerPredicate = !if(p.HasExt, HasDPP8, DisableInst);
+ let SubtargetPredicate = HasDPP8;
+}
+
//===----------------------------------------------------------------------===//
// GFX10.
//===----------------------------------------------------------------------===//
let DecoderNamespace = "SDWA10";
}
}
+ multiclass VOP1_Real_dpp_gfx10<bits<9> op> {
+ def _dpp_gfx10 : VOP1_DPP16<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")> {
+ let DecoderNamespace = "SDWA10";
+ }
+ }
+ multiclass VOP1_Real_dpp8_gfx10<bits<9> op> {
+ def _dpp8_gfx10 : VOP1_DPP8<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")> {
+ let DecoderNamespace = "DPP8";
+ }
+ }
} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
multiclass VOP1_Real_gfx10_no_dpp<bits<9> op> :
VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>,
VOP1_Real_sdwa_gfx10<op>;
-multiclass VOP1_Real_gfx10<bits<9> op> :
+multiclass VOP1_Real_gfx10_no_dpp8<bits<9> op> :
VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>,
- VOP1_Real_sdwa_gfx10<op>;
+ VOP1_Real_sdwa_gfx10<op>, VOP1_Real_dpp_gfx10<op>;
+
+multiclass VOP1_Real_gfx10<bits<9> op> :
+ VOP1_Real_gfx10_no_dpp8<op>, VOP1_Real_dpp8_gfx10<op>;
defm V_PIPEFLUSH : VOP1_Real_gfx10<0x01b>;
defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10<0x048>;
multiclass VOP1_Real_gfx6_gfx7_gfx10<bits<9> op> :
VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10<op>;
+multiclass VOP1_Real_gfx6_gfx7_gfx10_no_dpp8<bits<9> op> :
+ VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10_no_dpp8<op>;
+
multiclass VOP1_Real_gfx6_gfx7_gfx10_no_dpp<bits<9> op> :
VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10_no_dpp<op>;
defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x040>;
defm V_CLREXCP : VOP1_Real_gfx6_gfx7_gfx10<0x041>;
defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10_no_dpp<0x042>;
-defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10_no_dpp<0x043>;
-defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10_no_dpp<0x044>;
+defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10_no_dpp8<0x043>;
+defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10_no_dpp8<0x044>;
//===----------------------------------------------------------------------===//
// GFX8, GFX9 (VI).
}
defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>;
+
+//===----------------------------------------------------------------------===//
+// GFX10
+//===----------------------------------------------------------------------===//
+
+let OtherPredicates = [isGFX10Plus] in {
+def : GCNPat <
+ (i32 (int_amdgcn_mov_dpp8 i32:$src, imm:$dpp8)),
+ (V_MOV_B32_dpp8_gfx10 $src, $src, (as_i32imm $dpp8), (i32 DPP8Mode.FI_0))
+>;
+
+def : GCNPat <
+ (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
+ imm:$bound_ctrl)),
+ (V_MOV_B32_dpp_gfx10 $src, $src, (as_i32imm $dpp_ctrl),
+ (as_i32imm $row_mask), (as_i32imm $bank_mask),
+ (as_i1imm $bound_ctrl), (i32 0))
+>;
+
+def : GCNPat <
+ (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask,
+ imm:$bank_mask, imm:$bound_ctrl)),
+ (V_MOV_B32_dpp_gfx10 $old, $src, (as_i32imm $dpp_ctrl),
+ (as_i32imm $row_mask), (as_i32imm $bank_mask),
+ (as_i1imm $bound_ctrl), (i32 0))
+>;
+} // End OtherPredicates = [isGFX10Plus]
VGPR_32:$src2, // stub argument
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+ let InsDPP16 = !con(InsDPP, (ins FI:$fi));
+
+ let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
+ Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
+ VGPR_32:$src2, // stub argument
+ dpp8:$dpp8, FI:$fi);
let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
let Asm32 = getAsm32<1, 2, vt0>.ret;
let Asm64 = getAsm64<1, 2, 0, HasModifiers, HasOMod, vt0>.ret;
let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt0>.ret;
+ let AsmDPP16 = getAsmDPP16<1, 2, HasModifiers, vt0>.ret;
+ let AsmDPP8 = getAsmDPP8<1, 2, 0, vt0>.ret;
let AsmSDWA = getAsmSDWA<1, 2, vt0>.ret;
let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt0>.ret;
let HasSrc2 = 0;
let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
+ let AsmDPP8 = "$vdst, vcc, $src0, $src1 $dpp8$fi";
+ let AsmDPP16 = AsmDPP#"$fi";
let Outs32 = (outs DstRC:$vdst);
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
}
let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
+ let AsmDPP8 = "$vdst, vcc, $src0, $src1, vcc $dpp8$fi";
+ let AsmDPP16 = AsmDPP#"$fi";
let Outs32 = (outs DstRC:$vdst);
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
Src1DPP:$src1,
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+ let InsDPP16 = !con(InsDPP, (ins FI:$fi));
+
let HasExt = 1;
let HasExtDPP = 1;
let HasExtSDWA = 1;
let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
let AsmDPP = "$vdst, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
+ let AsmDPP8 = "$vdst, $src0, $src1, vcc $dpp8$fi";
+ let AsmDPP16 = AsmDPP#"$fi";
let Outs32 = (outs DstRC:$vdst);
let Outs64 = (outs DstRC:$vdst);
Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+ let InsDPP16 = !con(InsDPP, (ins FI:$fi));
+
let HasExt = 1;
let HasExtDPP = 1;
let HasExtSDWA = 1;
//===----------------------------------------------------------------------===//
class VOP2_DPP<bits<6> op, VOP2_Pseudo ps,
- string opName = ps.OpName, VOPProfile p = ps.Pfl> :
- VOP_DPP<opName, p> {
+ string opName = ps.OpName, VOPProfile p = ps.Pfl,
+ bit IsDPP16 = 0> :
+ VOP_DPP<opName, p, IsDPP16> {
let hasSideEffects = ps.hasSideEffects;
let Defs = ps.Defs;
let SchedRW = ps.SchedRW;
let Inst{31} = 0x0;
}
+class VOP2_DPP16<bits<6> op, VOP2_Pseudo ps,
+ string opName = ps.OpName, VOPProfile p = ps.Pfl> :
+ VOP2_DPP<op, ps, opName, p, 1> {
+ let AssemblerPredicate = !if(p.HasExt, HasDPP16, DisableInst);
+ let SubtargetPredicate = HasDPP16;
+}
+
+class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps,
+ string opName = ps.OpName, VOPProfile p = ps.Pfl> :
+ VOP_DPP8<ps.OpName, p> {
+ let hasSideEffects = ps.hasSideEffects;
+ let Defs = ps.Defs;
+ let SchedRW = ps.SchedRW;
+ let Uses = ps.Uses;
+
+ bits<8> vdst;
+ bits<8> src1;
+
+ let Inst{8-0} = fi;
+ let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0);
+ let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0);
+ let Inst{30-25} = op;
+ let Inst{31} = 0x0;
+
+ let AssemblerPredicate = !if(p.HasExt, HasDPP8, DisableInst);
+ let SubtargetPredicate = HasDPP8;
+}
+
//===----------------------------------------------------------------------===//
// GFX10.
//===----------------------------------------------------------------------===//
let DecoderNamespace = "SDWA10";
}
}
+ multiclass VOP2_Real_dpp_gfx10<bits<6> op> {
+ def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_Pseudo>(NAME#"_e32")> {
+ let DecoderNamespace = "SDWA10";
+ }
+ }
+ multiclass VOP2_Real_dpp8_gfx10<bits<6> op> {
+ def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")> {
+ let DecoderNamespace = "DPP8";
+ }
+ }
//===------------------------- VOP2 (with name) -------------------------===//
multiclass VOP2_Real_e32_gfx10_with_name<bits<6> op, string opName,
let AsmString = asmName # ps.AsmOperands;
}
}
+ multiclass VOP2_Real_dpp_gfx10_with_name<bits<6> op, string opName,
+ string asmName> {
+ def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
+ VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
+ let AsmString = asmName # ps.Pfl.AsmDPP16;
+ }
+ }
+ multiclass VOP2_Real_dpp8_gfx10_with_name<bits<6> op, string opName,
+ string asmName> {
+ def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
+ VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
+ let AsmString = asmName # ps.Pfl.AsmDPP8;
+ let DecoderNamespace = "DPP8";
+ }
+ }
} // End DecoderNamespace = "SDWA10"
//===------------------------------ VOP2be ------------------------------===//
let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands);
let DecoderNamespace = "SDWA10";
}
+ def _dpp_gfx10 :
+ VOP2_DPP16<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
+ string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
+ let AsmString = asmName # !subst(", vcc", "", AsmDPP);
+ let DecoderNamespace = "SDWA10";
+ }
+ def _dpp8_gfx10 :
+ VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
+ string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
+ let AsmString = asmName # !subst(", vcc", "", AsmDPP8);
+ let DecoderNamespace = "DPP8";
+ }
def _sdwa_w64_gfx10 :
Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
let isAsmParserOnly = 1;
let DecoderNamespace = "SDWA10";
}
+ def _dpp_w64_gfx10 :
+ VOP2_DPP16<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
+ string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
+ let AsmString = asmName # AsmDPP;
+ let isAsmParserOnly = 1;
+ }
+ def _dpp8_w64_gfx10 :
+ VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
+ string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
+ let AsmString = asmName # AsmDPP8;
+ let isAsmParserOnly = 1;
+ }
}
//===----------------------------- VOP3Only -----------------------------===//
multiclass VOP2_Real_gfx10<bits<6> op> :
VOP2_Real_e32_gfx10<op>, VOP2_Real_e64_gfx10<op>,
- VOP2_Real_sdwa_gfx10<op>;
+ VOP2_Real_sdwa_gfx10<op>, VOP2_Real_dpp_gfx10<op>, VOP2_Real_dpp8_gfx10<op>;
multiclass VOP2_Real_gfx10_with_name<bits<6> op, string opName,
string asmName> :
VOP2_Real_e32_gfx10_with_name<op, opName, asmName>,
VOP2_Real_e64_gfx10_with_name<op, opName, asmName>,
- VOP2_Real_sdwa_gfx10_with_name<op, opName, asmName>;
+ VOP2_Real_sdwa_gfx10_with_name<op, opName, asmName>,
+ VOP2_Real_dpp_gfx10_with_name<op, opName, asmName>,
+ VOP2_Real_dpp8_gfx10_with_name<op, opName, asmName>;
defm V_CNDMASK_B32 : Base_VOP2_Real_gfx10<0x001>;
defm V_XNOR_B32 : VOP2_Real_gfx10<0x01e>;
class VOP_SDWA10_Real<VOP_SDWA_Pseudo ps> :
Base_VOP_SDWA10_Real<ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SDWA10>;
-class VOP_DPPe<VOPProfile P> : Enc64 {
+class VOP_DPPe<VOPProfile P, bit IsDPP16=0> : Enc64 {
bits<2> src0_modifiers;
bits<8> src0;
bits<2> src1_modifiers;
let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0);
let Inst{48-40} = dpp_ctrl;
+ let Inst{50} = !if(IsDPP16, fi, ?);
let Inst{51} = bound_ctrl;
let Inst{52} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); // src0_neg
let Inst{53} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // src0_abs
let TSFlags = ps.TSFlags;
}
-class VOP_DPP <string OpName, VOPProfile P,
- dag InsDPP = P.InsDPP,
- string AsmDPP = P.AsmDPP> :
+class VOP_DPP <string OpName, VOPProfile P, bit IsDPP16,
+ dag InsDPP = !if(IsDPP16, P.InsDPP16, P.InsDPP),
+ string AsmDPP = !if(IsDPP16, P.AsmDPP16, P.AsmDPP)> :
InstSI <P.OutsDPP, InsDPP, OpName#AsmDPP, []>,
- VOP_DPPe<P> {
+ VOP_DPPe<P, IsDPP16> {
let mayLoad = 0;
let mayStore = 0;
let DecoderNamespace = "DPP";
}
+class VOP_DPP8e<VOPProfile P> : Enc64 {
+ bits<8> src0;
+ bits<24> dpp8;
+ bits<9> fi;
+
+ let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0);
+ let Inst{63-40} = dpp8{23-0};
+}
+
+class VOP_DPP8<string OpName, VOPProfile P> :
+ InstSI<P.OutsDPP8, P.InsDPP8, OpName#P.AsmDPP8, []>,
+ VOP_DPP8e<P> {
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
+
+ let VALU = 1;
+ let DPP = 1;
+ let Size = 8;
+
+ let AsmMatchConverter = "cvtDPP8";
+ let SubtargetPredicate = HasDPP8;
+ let AssemblerPredicate = !if(P.HasExt, HasDPP8, DisableInst);
+ let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.DPP,
+ AMDGPUAsmVariants.Disable);
+ let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $vdst", "");
+ let DisableEncoding = !if(P.NumSrcArgs, P.TieRegDPP, "");
+}
+
+def DPP8Mode {
+ int FI_0 = 0xE9;
+ int FI_1 = 0xEA;
+}
+
class getNumNodeArgs<SDPatternOperator Op> {
SDNode N = !cast<SDNode>(Op);
SDTypeProfile TP = N.TypeProfile;
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI -check-prefix=VI-OPT %s
-; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOOPT %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=VI,VI-OPT,PREGFX10,PREGFX10-OPT %s
+; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=VI,VI-NOOPT,PREGFX10,PREGFX10-NOOPT %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=VI,VI-OPT %s
; FIXME: The register allocator / scheduler should be able to avoid these hazards.
; VI-LABEL: {{^}}dpp_test:
; VI: v_mov_b32_e32 v0, s{{[0-9]+}}
; VI-NOOPT: v_mov_b32_e32 v1, s{{[0-9]+}}
-; VI-OPT: s_nop 1
-; VI-NOOPT: s_nop 0
-; VI-NOOPT: s_nop 0
+; PREGFX10-OPT: s_nop 1
+; PREGFX10-NOOPT: s_nop 0
+; PREGFX10-NOOPT: s_nop 0
; VI-OPT: v_mov_b32_dpp v0, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0x11]
; VI-NOOPT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x01,0x08,0x11]
define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) {
; VI-LABEL: {{^}}dpp_wait_states:
; VI-NOOPT: v_mov_b32_e32 [[VGPR1:v[0-9]+]], s{{[0-9]+}}
; VI: v_mov_b32_e32 [[VGPR0:v[0-9]+]], s{{[0-9]+}}
-; VI-OPT: s_nop 1
-; VI-NOOPT: s_nop 0
-; VI-NOOPT: s_nop 0
+; PREGFX10-OPT: s_nop 1
+; PREGFX10-NOOPT: s_nop 0
+; PREGFX10-NOOPT: s_nop 0
; VI-OPT: v_mov_b32_dpp [[VGPR0]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
; VI-NOOPT: v_mov_b32_dpp [[VGPR1]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:
-; VI-OPT: s_nop 1
-; VI-NOOPT: s_nop 0
-; VI-NOOPT: s_nop 0
+; PREGFX10-OPT: s_nop 1
+; PREGFX10-NOOPT: s_nop 0
+; PREGFX10-NOOPT: s_nop 0
; VI-OPT: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
; VI-NOOPT: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
define amdgpu_kernel void @dpp_wait_states(i32 addrspace(1)* %out, i32 %in) {
; VI-LABEL: {{^}}dpp_first_in_bb:
; VI: ; %endif
-; VI-OPT: s_mov_b32
-; VI-OPT: s_mov_b32
-; VI-NOOPT: s_waitcnt
-; VI-NOOPT-NEXT: s_nop 0
+; PREGFX10-OPT: s_mov_b32
+; PREGFX10-OPT: s_mov_b32
+; PREGFX10-NOOPT: s_waitcnt
+; PREGFX10-NOOPT-NEXT: s_nop 0
; VI: v_mov_b32_dpp [[VGPR0:v[0-9]+]], v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
-; VI-OPT: s_nop 1
+; PREGFX10-OPT: s_nop 1
; VI: v_mov_b32_dpp [[VGPR1:v[0-9]+]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
-; VI-OPT: s_nop 1
-; VI-NOOPT: s_nop 0
-; VI-NOOPT: s_nop 0
+; PREGFX10-OPT: s_nop 1
+; PREGFX10-NOOPT: s_nop 0
+; PREGFX10-NOOPT: s_nop 0
; VI: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
define amdgpu_kernel void @dpp_first_in_bb(float addrspace(1)* %out, float addrspace(1)* %in, float %cond, float %a, float %b) {
%cmp = fcmp oeq float %cond, 0.0
--- /dev/null
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
+
+; GFX10-LABEL: {{^}}dpp8_test:
+; GFX10: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
+; GFX10: v_mov_b32_dpp [[SRC]], [[SRC]] dpp8:[1,0,0,0,0,0,0,0]{{$}}
+define amdgpu_kernel void @dpp8_test(i32 addrspace(1)* %out, i32 %in) {
+ %tmp0 = call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %in, i32 1) #0
+ store i32 %tmp0, i32 addrspace(1)* %out
+ ret void
+}
+
+; GFX10-LABEL: {{^}}dpp8_wait_states:
+; GFX10-NOOPT: v_mov_b32_e32 [[VGPR1:v[0-9]+]], s{{[0-9]+}}
+; GFX10: v_mov_b32_e32 [[VGPR0:v[0-9]+]], s{{[0-9]+}}
+; GFX10: v_mov_b32_dpp [[VGPR0]], [[VGPR0]] dpp8:[1,0,0,0,0,0,0,0]{{$}}
+; GFX10: v_mov_b32_dpp [[VGPR0]], [[VGPR0]] dpp8:[5,0,0,0,0,0,0,0]{{$}}
+define amdgpu_kernel void @dpp8_wait_states(i32 addrspace(1)* %out, i32 %in) {
+ %tmp0 = call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %in, i32 1) #0
+ %tmp1 = call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %tmp0, i32 5) #0
+ store i32 %tmp1, i32 addrspace(1)* %out
+ ret void
+}
+
+declare i32 @llvm.amdgcn.mov.dpp8.i32(i32, i32) #0
+
+attributes #0 = { nounwind readnone convergent }
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-dpp-combine=false -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI -check-prefix=VI-OPT %s
-; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-dpp-combine=false -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOOPT %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10 %s
-; VI-LABEL: {{^}}dpp_test:
-; VI: v_mov_b32_e32 v0, s{{[0-9]+}}
-; VI: v_mov_b32_e32 v1, s{{[0-9]+}}
-; VI-OPT: s_nop 1
-; VI-NOOPT: s_nop 0
-; VI-NOOPT: s_nop 0
-; VI: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x01,0x08,0x11]
+; GCN-LABEL: {{^}}dpp_test:
+; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
+; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
+; GFX8: s_nop 1
+; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
- %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 1) #0
+ %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 0) #0
store i32 %tmp0, i32 addrspace(1)* %out
ret void
}
+; GCN-LABEL: {{^}}dpp_test_bc:
+; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
+; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
+; GFX8: s_nop 1
+; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[2,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0{{$}}
+define amdgpu_kernel void @dpp_test_bc(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
+ %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 2, i32 1, i32 1, i1 1) #0
+ store i32 %tmp0, i32 addrspace(1)* %out
+ ret void
+}
+
+
; VI-LABEL: {{^}}dpp_test1:
-; VI-OPT: v_add_u32_e32 [[REG:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}
-; VI-NOOPT: v_add_u32_e64 [[REG:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{[0-9]+}}
-; VI-NOOPT: v_mov_b32_e32 v{{[0-9]+}}, 0
-; VI-NEXT: s_nop 0
-; VI-NEXT: s_nop 0
-; VI-NEXT: v_mov_b32_dpp {{v[0-9]+}}, [[REG]] quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf
+; GFX10: v_add_nc_u32_e32 [[REG:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
+; GFX8-OPT: v_add_u32_e32 [[REG:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}
+; GFX8-NOOPT: v_add_u32_e64 [[REG:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{[0-9]+}}
+; GFX8-NOOPT: v_mov_b32_e32 v{{[0-9]+}}, 0
+; GFX8: s_nop 0
+; GFX8-NEXT: s_nop 0
+; GFX8-OPT-NEXT: v_mov_b32_dpp {{v[0-9]+}}, [[REG]] quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf
@0 = internal unnamed_addr addrspace(3) global [448 x i32] undef, align 4
define weak_odr amdgpu_kernel void @dpp_test1(i32* %arg) local_unnamed_addr {
bb:
--- /dev/null
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding < %s 2>&1 | FileCheck -check-prefix=GFX89 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding < %s 2>&1 | FileCheck -check-prefix=GFX89 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding < %s 2>&1 | FileCheck -check-prefix=GFX10 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding < %s 2>&1 | FileCheck -check-prefix=GFX89-ERR %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding < %s 2>&1 | FileCheck -check-prefix=GFX89-ERR %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding < %s 2>&1 | FileCheck -check-prefix=GFX10-ERR %s
+
+v_mov_b32_dpp v0, v1 row_share:1 row_mask:0x1 bank_mask:0x1
+// GFX89-ERR: not a valid operand.
+// GFX10: v_mov_b32_dpp v0, v1 row_share:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x51,0x01,0x11]
+
+v_mov_b32_dpp v0, v1 row_xmask:1 row_mask:0x1 bank_mask:0x1
+// GFX89-ERR: not a valid operand.
+// GFX10: v_mov_b32_dpp v0, v1 row_xmask:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x61,0x01,0x11]
+
+v_mov_b32_dpp v0, v1 wave_shl:1 row_mask:0x1 bank_mask:0x1
+// GFX89: v0, v1 wave_shl:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x30,0x01,0x11]
+// GFX10-ERR: not a valid operand.
+
+v_mov_b32_dpp v0, v1 wave_shr:1 row_mask:0x1 bank_mask:0x1
+// GFX89: v0, v1 wave_shr:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x38,0x01,0x11]
+// GFX10-ERR: not a valid operand.
+
+v_mov_b32_dpp v0, v1 wave_rol:1 row_mask:0x1 bank_mask:0x1
+// GFX89: v0, v1 wave_rol:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x34,0x01,0x11]
+// GFX10-ERR: not a valid operand.
+
+v_mov_b32_dpp v0, v1 wave_ror:1 row_mask:0x1 bank_mask:0x1
+// GFX89: v0, v1 wave_ror:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x3c,0x01,0x11]
+// GFX10-ERR: not a valid operand.
+
+v_mov_b32_dpp v0, v1 row_bcast:15 row_mask:0x1 bank_mask:0x1
+// GFX89: v0, v1 row_bcast:15 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x42,0x01,0x11]
+// GFX10-ERR: not a valid operand.
+
+v_mov_b32_dpp v0, v1 row_bcast:31 row_mask:0x1 bank_mask:0x1
+// GFX89: v0, v1 row_bcast:31 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x43,0x01,0x11]
+// GFX10-ERR: not a valid operand.