/// Measure the specified inline asm to determine an approximation of its
/// length.
- virtual unsigned getInlineAsmLength(const char *Str,
- const MCAsmInfo &MAI) const;
+ virtual unsigned getInlineAsmLength(
+ const char *Str, const MCAsmInfo &MAI,
+ const TargetSubtargetInfo *STI = nullptr) const;
/// Allocate and return a hazard recognizer to use for this target when
/// scheduling the machine instructions before register allocation.
class MCExpr;
class MCSection;
class MCStreamer;
+class MCSubtargetInfo;
class MCSymbol;
namespace WinEH {
bool hasMachoTBSSDirective() const { return HasMachoTBSSDirective; }
bool hasCOFFAssociativeComdats() const { return HasCOFFAssociativeComdats; }
bool hasCOFFComdatConstants() const { return HasCOFFComdatConstants; }
- unsigned getMaxInstLength() const { return MaxInstLength; }
+
+ /// Returns the maximum possible encoded instruction size in bytes. If \p STI
+ /// is null, this should be the maximum size for any subtarget.
+ virtual unsigned getMaxInstLength(const MCSubtargetInfo *STI = nullptr) const {
+ return MaxInstLength;
+ }
+
unsigned getMinInstAlignment() const { return MinInstAlignment; }
bool getDollarIsPC() const { return DollarIsPC; }
const char *getSeparatorString() const { return SeparatorString; }
/// simple--i.e. not a logical or arithmetic expression--size values without
/// the optional fill value. This is primarily used for creating arbitrary
/// sized inline asm blocks for testing purposes.
-unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
- const MCAsmInfo &MAI) const {
+unsigned TargetInstrInfo::getInlineAsmLength(
+ const char *Str,
+ const MCAsmInfo &MAI, const TargetSubtargetInfo *STI) const {
// Count the number of instructions in the asm.
bool AtInsnStart = true;
unsigned Length = 0;
+ const unsigned MaxInstLength = MAI.getMaxInstLength(STI);
for (; *Str; ++Str) {
if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
strlen(MAI.getSeparatorString())) == 0) {
}
if (AtInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
- unsigned AddLength = MAI.getMaxInstLength();
+ unsigned AddLength = MaxInstLength;
if (strncmp(Str, ".space", 6) == 0) {
char *EStr;
int SpaceSize;
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCExpr.h"
using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
+AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
+ MCContext &Ctx,
+ MCInstrInfo const *MCII) :
+ MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
+ TargetMaxInstBytes(Ctx.getAsmInfo()->getMaxInstLength(&STI)) {}
+
inline static MCDisassembler::DecodeStatus
addOperand(MCInst &Inst, const MCOperand& Opnd) {
Inst.addOperand(Opnd);
if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding] && !isGFX10())
report_fatal_error("Disassembly not yet supported for subtarget");
- unsigned MaxInstBytesNum = (std::min)(
- STI.getFeatureBits()[AMDGPU::FeatureGFX10] ? (size_t) 20 :
- STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal] ? (size_t) 12 : (size_t)8,
- Bytes_.size());
+ unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
Bytes = Bytes_.slice(0, MaxInstBytesNum);
DecodeStatus Res = MCDisassembler::Fail;
private:
std::unique_ptr<MCInstrInfo const> const MCII;
const MCRegisterInfo &MRI;
+ const unsigned TargetMaxInstBytes;
mutable ArrayRef<uint8_t> Bytes;
mutable uint32_t Literal;
mutable bool HasLiteral;
public:
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
- MCInstrInfo const *MCII) :
- MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()) {}
-
+ MCInstrInfo const *MCII);
~AMDGPUDisassembler() override = default;
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size,
#include "AMDGPUMCAsmInfo.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
using namespace llvm;
HasSingleParameterDotFile = false;
//===------------------------------------------------------------------===//
MinInstAlignment = 4;
- MaxInstLength = (TT.getArch() == Triple::amdgcn) ? 8 : 16;
+
+ // This is the maximum instruction encoded size for gfx10. With a known
+ // subtarget, it can be reduced to 8 bytes.
+ MaxInstLength = (TT.getArch() == Triple::amdgcn) ? 20 : 16;
SeparatorString = "\n";
CommentString = ";";
PrivateLabelPrefix = "";
SectionName == ".hsarodata_readonly_agent" ||
MCAsmInfo::shouldOmitSectionDirective(SectionName);
}
+
+unsigned AMDGPUMCAsmInfo::getMaxInstLength(const MCSubtargetInfo *STI) const {
+ if (!STI || STI->getTargetTriple().getArch() == Triple::r600)
+ return MaxInstLength;
+
+ // Maximum for NSA encoded images
+ if (STI->getFeatureBits()[AMDGPU::FeatureNSAEncoding])
+ return 20;
+
+ // 64-bit instruction with 32-bit literal.
+ if (STI->getFeatureBits()[AMDGPU::FeatureVOP3Literal])
+ return 12;
+
+ return 8;
+}
public:
explicit AMDGPUMCAsmInfo(const Triple &TT);
bool shouldOmitSectionDirective(StringRef SectionName) const override;
+ unsigned getMaxInstLength(const MCSubtargetInfo *STI) const override;
};
} // namespace llvm
#endif
case TargetOpcode::INLINEASM_BR: {
const MachineFunction *MF = MI.getParent()->getParent();
const char *AsmStr = MI.getOperand(0).getSymbolName();
- return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
+ return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo(),
+ &MF->getSubtarget());
}
default:
return DescSize;
/// Hexagon counts the number of ##'s and adjust for that many
/// constant exenders.
unsigned HexagonInstrInfo::getInlineAsmLength(const char *Str,
- const MCAsmInfo &MAI) const {
+ const MCAsmInfo &MAI,
+ const TargetSubtargetInfo *STI) const {
StringRef AStr(Str);
// Count the number of instructions in the asm.
bool atInsnStart = true;
unsigned Length = 0;
+ const unsigned MaxInstLength = MAI.getMaxInstLength(STI);
for (; *Str; ++Str) {
if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
strlen(MAI.getSeparatorString())) == 0)
atInsnStart = true;
if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
- Length += MAI.getMaxInstLength();
+ Length += MaxInstLength;
atInsnStart = false;
}
if (atInsnStart && strncmp(Str, MAI.getCommentString().data(),
/// Measure the specified inline asm to determine an approximation of its
/// length.
- unsigned getInlineAsmLength(const char *Str,
- const MCAsmInfo &MAI) const override;
+ unsigned getInlineAsmLength(
+ const char *Str,
+ const MCAsmInfo &MAI,
+ const TargetSubtargetInfo *STI = nullptr) const override;
/// Allocate and return a hazard recognizer to use for this target when
/// scheduling the machine instructions after register allocation.
--- /dev/null
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
+
+; Make sure the code size estimate for inline asm is 12-bytes per
+; instruction, rather than 8 in previous generations.
+
+; GCN-LABEL: {{^}}long_forward_branch_gfx10only:
+; GFX9: s_cmp_eq_u32
+; GFX9-NEXT: s_cbranch_scc1
+
+; GFX10: s_cmp_eq_u32
+; GFX10-NEXT: s_cbranch_scc0
+; GFX10: s_getpc_b64
+; GFX10: s_add_u32
+; GFX10: s_addc_u32
+; GFX10: s_setpc_b64
+define amdgpu_kernel void @long_forward_branch_gfx10only(i32 addrspace(1)* %arg, i32 %cnd) #0 {
+bb0:
+ %cmp = icmp eq i32 %cnd, 0
+ br i1 %cmp, label %bb3, label %bb2 ; +9 dword branch
+
+bb2:
+ ; Estimated as 40-bytes on gfx10 (requiring a long branch), but
+ ; 16-bytes on gfx9 (allowing a short branch)
+ call void asm sideeffect
+ "v_nop_e64
+ v_nop_e64", ""() #0
+ br label %bb3
+
+bb3:
+ store volatile i32 %cnd, i32 addrspace(1)* %arg
+ ret void
+}