"GFX10 bug, inst_offset ignored in flat segment"
>;
+def FeatureOffset3fBug : SubtargetFeature<"offset-3f-bug",
+ "HasOffset3fBug",
+ "true",
+ "Branch offset of 3f hardware bug"
+>;
+
class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
"ldsbankcount"#Value,
"LDSBankCount",
FeatureVcmpxExecWARHazard,
FeatureLdsBranchVmemWARHazard,
FeatureNSAtoVMEMBug,
+ FeatureOffset3fBug,
FeatureFlatSegmentOffsetBug
];
}
def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">,
AssemblerPredicate<"FeatureDot6Insts">;
+def HasOffset3fBug : Predicate<"!Subtarget->hasOffset3fBug()">,
+ AssemblerPredicate<"FeatureOffset3fBug">;
+
def EnableLateCFGStructurize : Predicate<
"EnableLateStructurizeCFG">;
HasVcmpxExecWARHazard(false),
HasLdsBranchVmemWARHazard(false),
HasNSAtoVMEMBug(false),
+ HasOffset3fBug(false),
HasFlatSegmentOffsetBug(false),
FeatureDisable(false),
bool HasVcmpxExecWARHazard;
bool HasLdsBranchVmemWARHazard;
bool HasNSAtoVMEMBug;
+ bool HasOffset3fBug;
bool HasFlatSegmentOffsetBug;
// Dummy feature to use for assembler in tablegen.
return HasR128A16;
}
+ bool hasOffset3fBug() const {
+ return HasOffset3fBug;
+ }
+
bool hasNSAEncoding() const {
return HasNSAEncoding;
}
const MCSubtargetInfo *STI) const override;
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
const MCRelaxableFragment *DF,
- const MCAsmLayout &Layout) const override {
- return false;
- }
+ const MCAsmLayout &Layout) const override;
+
void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- MCInst &Res) const override {
- llvm_unreachable("Not implemented");
- }
+ MCInst &Res) const override;
+
bool mayNeedRelaxation(const MCInst &Inst,
- const MCSubtargetInfo &STI) const override {
- return false;
- }
+ const MCSubtargetInfo &STI) const override;
unsigned getMinimumNopSize() const override;
bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
} //End anonymous namespace
+void AMDGPUAsmBackend::relaxInstruction(const MCInst &Inst,
+ const MCSubtargetInfo &STI,
+ MCInst &Res) const {
+ unsigned RelaxedOpcode = AMDGPU::getSOPPWithRelaxation(Inst.getOpcode());
+ Res.setOpcode(RelaxedOpcode);
+ Res.addOperand(Inst.getOperand(0));
+ return;
+}
+
+bool AMDGPUAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const {
+ // if the branch target has an offset of x3f this needs to be relaxed to
+ // add a s_nop 0 immediately after branch to effectively increment offset
+ // for hardware workaround in gfx1010
+ return (((int64_t(Value)/4)-1) == 0x3f);
+}
+
+bool AMDGPUAsmBackend::mayNeedRelaxation(const MCInst &Inst,
+ const MCSubtargetInfo &STI) const {
+ if (!STI.getFeatureBits()[AMDGPU::FeatureOffset3fBug])
+ return false;
+
+ if (AMDGPU::getSOPPWithRelaxation(Inst.getOpcode()) >= 0)
+ return true;
+
+ return false;
+}
+
static unsigned getFixupKindNumBytes(unsigned Kind) {
switch (Kind) {
case AMDGPU::fixup_si_sopp_br:
let ValueCols = [["0"]];
}
+// Maps a SOPP to a SOPP with S_NOP
+def getSOPPWithRelaxation : InstrMapping {
+ let FilterClass = "Base_SOPP";
+ let RowFields = ["AsmString"];
+ let ColFields = ["Size"];
+ let KeyCol = ["4"];
+ let ValueCols = [["8"]];
+}
+
include "SIInstructions.td"
include "DSInstructions.td"
// SOPP Instructions
//===----------------------------------------------------------------------===//
+class Base_SOPP <string asm> {
+ string AsmString = asm;
+}
+
class SOPPe <bits<7> op> : Enc32 {
bits <16> simm16;
}
class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern = []> :
- InstSI <(outs), ins, asm, pattern >, SOPPe <op> {
+ InstSI <(outs), ins, asm, pattern >, SOPPe <op>, Base_SOPP <asm> {
let mayLoad = 0;
let mayStore = 0;
let UseNamedOperandTable = 1;
}
-
def S_NOP : SOPP <0x00000000, (ins i16imm:$simm16), "s_nop $simm16">;
+class SOPP_w_nop_e <bits<7> op> : Enc64 {
+ bits <16> simm16;
+
+ let Inst{15-0} = simm16;
+ let Inst{22-16} = op;
+ let Inst{31-23} = 0x17f; // encoding
+ let Inst{47-32} = 0x0;
+ let Inst{54-48} = S_NOP.Inst{22-16}; // opcode
+ let Inst{63-55} = S_NOP.Inst{31-23}; // encoding
+}
+
+class SOPP_w_nop <bits<7> op, dag ins, string asm, list<dag> pattern = []> :
+ InstSI <(outs), ins, asm, pattern >, SOPP_w_nop_e <op>, Base_SOPP <asm> {
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let SALU = 1;
+ let SOPP = 1;
+ let Size = 8;
+ let SchedRW = [WriteSALU];
+
+ let UseNamedOperandTable = 1;
+}
+
+multiclass SOPP_With_Relaxation <bits<7> op, dag ins, string asm, list<dag> pattern = []> {
+ def "" : SOPP <op, ins, asm, pattern>;
+ def _pad_s_nop : SOPP_w_nop <op, ins, asm, pattern>;
+}
+
let isTerminator = 1 in {
def S_ENDPGM : SOPP <0x00000001, (ins EndpgmImm:$simm16), "s_endpgm$simm16"> {
} // End SubtargetPredicate = isGFX10Plus
let isBranch = 1, SchedRW = [WriteBranch] in {
-def S_BRANCH : SOPP <
+let isBarrier = 1 in {
+defm S_BRANCH : SOPP_With_Relaxation <
0x00000002, (ins sopp_brtarget:$simm16), "s_branch $simm16",
- [(br bb:$simm16)]> {
- let isBarrier = 1;
+ [(br bb:$simm16)]>;
}
let Uses = [SCC] in {
-def S_CBRANCH_SCC0 : SOPP <
+defm S_CBRANCH_SCC0 : SOPP_With_Relaxation <
0x00000004, (ins sopp_brtarget:$simm16),
"s_cbranch_scc0 $simm16"
>;
-def S_CBRANCH_SCC1 : SOPP <
+defm S_CBRANCH_SCC1 : SOPP_With_Relaxation <
0x00000005, (ins sopp_brtarget:$simm16),
"s_cbranch_scc1 $simm16"
>;
} // End Uses = [SCC]
let Uses = [VCC] in {
-def S_CBRANCH_VCCZ : SOPP <
+defm S_CBRANCH_VCCZ : SOPP_With_Relaxation <
0x00000006, (ins sopp_brtarget:$simm16),
"s_cbranch_vccz $simm16"
>;
-def S_CBRANCH_VCCNZ : SOPP <
+defm S_CBRANCH_VCCNZ : SOPP_With_Relaxation <
0x00000007, (ins sopp_brtarget:$simm16),
"s_cbranch_vccnz $simm16"
>;
} // End Uses = [VCC]
let Uses = [EXEC] in {
-def S_CBRANCH_EXECZ : SOPP <
+defm S_CBRANCH_EXECZ : SOPP_With_Relaxation <
0x00000008, (ins sopp_brtarget:$simm16),
"s_cbranch_execz $simm16"
>;
-def S_CBRANCH_EXECNZ : SOPP <
+defm S_CBRANCH_EXECNZ : SOPP_With_Relaxation <
0x00000009, (ins sopp_brtarget:$simm16),
"s_cbranch_execnz $simm16"
>;
} // End Uses = [EXEC]
-def S_CBRANCH_CDBGSYS : SOPP <
+defm S_CBRANCH_CDBGSYS : SOPP_With_Relaxation <
0x00000017, (ins sopp_brtarget:$simm16),
"s_cbranch_cdbgsys $simm16"
>;
-def S_CBRANCH_CDBGSYS_AND_USER : SOPP <
+defm S_CBRANCH_CDBGSYS_AND_USER : SOPP_With_Relaxation <
0x0000001A, (ins sopp_brtarget:$simm16),
"s_cbranch_cdbgsys_and_user $simm16"
>;
-def S_CBRANCH_CDBGSYS_OR_USER : SOPP <
+defm S_CBRANCH_CDBGSYS_OR_USER : SOPP_With_Relaxation <
0x00000019, (ins sopp_brtarget:$simm16),
"s_cbranch_cdbgsys_or_user $simm16"
>;
-def S_CBRANCH_CDBGUSER : SOPP <
+defm S_CBRANCH_CDBGUSER : SOPP_With_Relaxation <
0x00000018, (ins sopp_brtarget:$simm16),
"s_cbranch_cdbguser $simm16"
>;
LLVM_READONLY
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
+LLVM_READONLY
+int getSOPPWithRelaxation(uint16_t Opcode);
+
struct MIMGBaseOpcodeInfo {
MIMGBaseOpcode BaseOpcode;
bool Store;
--- /dev/null
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck %s --check-prefix=GFX10
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -filetype=obj %s | llvm-objdump -disassemble -mcpu=gfx1010 - | FileCheck %s --check-prefix=BIN
+ s_getpc_b64 s[0:1]
+ s_cbranch_vccnz BB0_1
+// GFX10: s_cbranch_vccnz BB0_1 ; encoding: [A,A,0x87,0xbf]
+// GFX10-NEXT: ; fixup A - offset: 0, value: BB0_1, kind: fixup_si_sopp_br
+// BIN: s_cbranch_vccnz BB0_1 // 000000000004: BF870040
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+BB0_1:
+ s_nop 0
+ s_endpgm
--- /dev/null
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck %s --check-prefix=GFX10
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -filetype=obj %s | llvm-objdump -disassemble -mcpu=gfx1010 - | FileCheck %s --check-prefix=BIN
+ s_getpc_b64 s[0:1]
+ s_cbranch_vccnz BB0_1
+// GFX10: s_cbranch_vccnz BB0_1 ; encoding: [A,A,0x87,0xbf]
+// GFX10-NEXT: ; fixup A - offset: 0, value: BB0_1, kind: fixup_si_sopp_br
+// BIN: s_cbranch_vccnz BB0_1 // 000000000004: BF870041
+ s_cbranch_execz BB0_3
+// GFX10: s_cbranch_execz BB0_3 ; encoding: [A,A,0x88,0xbf]
+// GFX10-NEXT: ; fixup A - offset: 0, value: BB0_3, kind: fixup_si_sopp_br
+// BIN: s_cbranch_execz BB0_3 // 00000000000C: BF880040
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+BB0_1:
+ s_nop 0
+BB0_3:
+ s_nop 0
+ s_endpgm
--- /dev/null
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck %s --check-prefix=GFX10
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -filetype=obj %s | llvm-objdump -disassemble -mcpu=gfx1010 - | FileCheck %s --check-prefix=BIN
+ s_getpc_b64 s[0:1]
+ s_cbranch_vccnz BB0_2
+// GFX10: s_cbranch_vccnz BB0_2 ; encoding: [A,A,0x87,0xbf]
+// GFX10-NEXT: ; fixup A - offset: 0, value: BB0_2, kind: fixup_si_sopp_br
+// BIN: s_cbranch_vccnz BB0_2 // 000000000004: BF870061
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ v_nop
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_cbranch_vccnz BB0_1
+// GFX10: s_cbranch_vccnz BB0_1 ; encoding: [A,A,0x87,0xbf]
+// GFX10-NEXT: ; fixup A - offset: 0, value: BB0_1, kind: fixup_si_sopp_br
+// BIN: s_cbranch_vccnz BB0_1 // 000000000064: BF870041
+ s_nop 0
+ s_cbranch_execz BB0_3
+// GFX10: s_cbranch_execz BB0_3 ; encoding: [A,A,0x88,0xbf]
+// GFX10-NEXT: ; fixup A - offset: 0, value: BB0_3, kind: fixup_si_sopp_br
+// BIN: s_cbranch_execz BB0_3 // 00000000006C: BF880040
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_nop 0
+BB0_1:
+ s_nop 0
+BB0_3:
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ s_cbranch_vccnz BB0_2
+// GFX10: s_cbranch_vccnz BB0_2 ; encoding: [A,A,0x87,0xbf]
+// GFX10-NEXT: ; fixup A - offset: 0, value: BB0_2, kind: fixup_si_sopp_br
+// BIN: s_cbranch_vccnz BB0_2 // 00000000017C: BF870003
+ s_nop 0
+ s_nop 0
+ s_nop 0
+BB0_2:
+ s_nop 0
+ s_nop 0
+ s_endpgm