ScheduleHazardRecognizer::HazardType
GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
MachineInstr *MI = SU->getInstr();
+ if (MI->isBundle())
+ return NoHazard;
if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
return NoopHazard;
return NoHazard;
}
+static void insertNoopInBundle(MachineInstr *MI, const SIInstrInfo &TII) {
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
+ .addImm(0);
+}
+
+void GCNHazardRecognizer::processBundle() {
+ MachineBasicBlock::instr_iterator MI = std::next(CurrCycleInstr->getIterator());
+ MachineBasicBlock::instr_iterator E = CurrCycleInstr->getParent()->instr_end();
+ // Check bundled MachineInstr's for hazards.
+ for (; MI != E && MI->isInsideBundle(); ++MI) {
+ CurrCycleInstr = &*MI;
+ unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr);
+
+ if (IsHazardRecognizerMode)
+ fixHazards(CurrCycleInstr);
+
+ for (unsigned i = 0; i < WaitStates; ++i)
+ insertNoopInBundle(CurrCycleInstr, TII);
+
+ // It’s unnecessary to track more than MaxLookAhead instructions. Since we
+ // include the bundled MI directly after, only add a maximum of
+ // (MaxLookAhead - 1) noops to EmittedInstrs.
+ for (unsigned i = 0, e = std::min(WaitStates, MaxLookAhead - 1); i < e; ++i)
+ EmittedInstrs.push_front(nullptr);
+
+ EmittedInstrs.push_front(CurrCycleInstr);
+ EmittedInstrs.resize(MaxLookAhead);
+ }
+ CurrCycleInstr = nullptr;
+}
+
unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
IsHazardRecognizerMode = false;
return PreEmitNoopsCommon(SU->getInstr());
IsHazardRecognizerMode = true;
CurrCycleInstr = MI;
unsigned W = PreEmitNoopsCommon(MI);
-
- fixVMEMtoScalarWriteHazards(MI);
- fixSMEMtoVectorWriteHazards(MI);
- fixVcmpxExecWARHazard(MI);
- fixLdsBranchVmemWARHazard(MI);
-
+ fixHazards(MI);
CurrCycleInstr = nullptr;
return W;
}
unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
+ if (MI->isBundle())
+ return 0;
+
int WaitStates = std::max(0, checkAnyInstHazards(MI));
if (SIInstrInfo::isSMRD(*MI))
CurrCycleInstr->isKill())
return;
+ if (CurrCycleInstr->isBundle()) {
+ processBundle();
+ return;
+ }
+
unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
// Keep track of emitted instructions
int WaitStates,
IsExpiredFn IsExpired,
DenseSet<const MachineBasicBlock *> &Visited) {
+ for (auto E = MBB->instr_rend(); I != E; ++I) {
+ // Don't add WaitStates for parent BUNDLE instructions.
+ if (I->isBundle())
+ continue;
- for (auto E = MBB->rend() ; I != E; ++I) {
if (IsHazard(&*I))
return WaitStates;
// instructions in this group may return out of order and/or may be
// replayed (i.e. the same instruction issued more than once).
//
- // In order to handle these situations correctly we need to make sure
- // that when a clause has more than one instruction, no instruction in the
- // clause writes to a register that is read another instruction in the clause
+ // In order to handle these situations correctly we need to make sure that
+ // when a clause has more than one instruction, no instruction in the clause
+ // writes to a register that is read by another instruction in the clause
// (including itself). If we encounter this situaion, we need to break the
// clause by inserting a non SMEM instruction.
// SGPR was written by a VALU Instruction.
const int VmemSgprWaitStates = 5;
auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
-
for (const MachineOperand &Use : VMEM->uses()) {
if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
continue;
SMovRelWaitStates);
}
+void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
+ fixVMEMtoScalarWriteHazards(MI);
+ fixSMEMtoVectorWriteHazards(MI);
+ fixVcmpxExecWARHazard(MI);
+ fixLdsBranchVmemWARHazard(MI);
+}
+
bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
if (!ST.hasVMEMtoScalarWriteHazard())
return false;
void addClauseInst(const MachineInstr &MI);
+ // Advance over a MachineInstr bundle. Look for hazards in the bundled
+ // instructions.
+ void processBundle();
+
int getWaitStatesSince(IsHazardFn IsHazard, int Limit);
int getWaitStatesSinceDef(unsigned Reg, IsHazardFn IsHazardDef, int Limit);
int getWaitStatesSinceSetReg(IsHazardFn IsHazard, int Limit);
int checkAnyInstHazards(MachineInstr *MI);
int checkReadM0Hazards(MachineInstr *SMovRel);
int checkNSAtoVMEMHazard(MachineInstr *MI);
+
+ void fixHazards(MachineInstr *MI);
bool fixVMEMtoScalarWriteHazards(MachineInstr *MI);
bool fixSMEMtoVectorWriteHazards(MachineInstr *MI);
bool fixVcmpxExecWARHazard(MachineInstr *MI);
--- /dev/null
+# RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK,GCX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK,GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK,GFX10 %s
+
+# GCN-LABEL: name: break_smem_clause_simple_load_smrd8_ptr_hidden_bundle
+# GCN: }
+# XNACK-NEXT: S_NOP
+# NOXNACK-NOT: S_NOP
+# GCN: S_LOAD_DWORDX2_IMM
+---
+name: break_smem_clause_simple_load_smrd8_ptr_hidden_bundle
+body: |
+ bb.0:
+ BUNDLE implicit-def $sgpr6_sgpr7 {
+ $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
+ }
+ $sgpr14_sgpr15 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
+ S_ENDPGM 0
+...
+
+# GFX9-LABEL: name: hazard_precedes_bundle
+# GFX9: S_MOV_B32
+# GFX9-NEXT: S_NOP
+# GFX9: BUNDLE
+# GFX9-NEXT: S_NOP
+---
+name: hazard_precedes_bundle
+body: |
+ bb.0:
+ $m0 = S_MOV_B32 $sgpr7
+ S_SENDMSG 3, implicit $exec, implicit $m0
+ $m0 = S_MOV_B32 $sgpr8
+ BUNDLE implicit-def $vgpr0 {
+ $vgpr0 = V_INTERP_P1_F32 killed $vgpr4, 0, 0, implicit $m0, implicit $exec
+ }
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: vmem_vcc_hazard_ignore_bundle_instr
+# GCN: S_LOAD_DWORDX2_IMM
+# GCN-NEXT: }
+# GCN-NEXT: S_NOP
+# GCN-NEXT: S_NOP
+# GCN-NEXT: S_NOP
+# GCN-NEXT: S_NOP
+# GCN: BUFFER_LOAD_DWORD_OFFEN
+---
+name: vmem_vcc_hazard_ignore_bundle_instr
+body: |
+ bb.0:
+ $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
+ $vgpr0 = IMPLICIT_DEF
+ BUNDLE implicit-def $vgpr1, implicit $vgpr0, implicit $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec {
+ $vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
+ }
+ BUNDLE implicit-def $sgpr0_sgpr1, implicit $sgpr10_sgpr11 {
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
+ }
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: vmem_vcc_min_of_two_after_bundle
+# GCN: bb.2:
+# GCN-NEXT: S_NOP
+# GCN-NEXT: S_NOP
+# GCN-NEXT: S_NOP
+# GCN-NEXT: S_NOP
+# GCN-NEXT: S_NOP
+# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
+---
+name: vmem_vcc_min_of_two_after_bundle
+body: |
+ bb.0:
+ successors: %bb.2
+
+ BUNDLE implicit-def $vgpr1, implicit $vgpr0 {
+ $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
+ $vgpr0 = IMPLICIT_DEF
+ $vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
+ }
+ S_NOP 0
+ S_BRANCH %bb.2
+
+ bb.1:
+ successors: %bb.2
+
+ BUNDLE implicit-def $vgpr1, implicit $vgpr0 {
+ $vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
+ }
+
+ bb.2:
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
+...
--- /dev/null
+# RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK,GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK,GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK,GFX10 %s
+
+# GCN-LABEL: name: break_smem_clause_max_look_ahead_in_bundle
+# GCN: S_LOAD_DWORDX2_IMM
+# XNACK-NEXT: S_NOP
+# NOXNACK-NOT: S_NOP
+# GCN: S_LOAD_DWORDX2
+# XNACK-NEXT: S_NOP
+# NOXNACK-NOT: S_NOP
+# GCN: }
+---
+name: break_smem_clause_max_look_ahead_in_bundle
+body: |
+ bb.0:
+ BUNDLE implicit-def $sgpr6_sgpr7 {
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
+ S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0, 0
+ S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 4, 0, 0
+ S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 8, 0, 0
+ S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 12, 0, 0
+ S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 16, 0, 0
+ $sgpr14_sgpr15 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
+ $sgpr16_sgpr17 = S_LOAD_DWORDX2_IMM $sgpr14_sgpr15, 0, 0, 0
+ }
+ S_ENDPGM 0
+...
+
+# GFX10-LABEL: name: hazard_smem_war_in_bundle
+# GFX10: S_LOAD_DWORD_IMM
+# GFX10-NEXT: $sgpr_null = S_MOV_B32 0
+# GFX10: V_CMP_EQ_F32
+---
+name: hazard_smem_war_in_bundle
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
+ BUNDLE implicit-def $sgpr0_sgpr1 {
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
+ }
+ S_ENDPGM 0
+...
+
+# GFX9-LABEL: name: hazard_ignore_dbg_label_in_bundle
+# GFX9: DBG_LABEL 6
+# GFX9-NEXT: S_NOP 0
+# GFX9: S_SENDMSG 3, implicit $exec, implicit $m0
+---
+name: hazard_ignore_dbg_label_in_bundle
+body: |
+ bb.0:
+ BUNDLE {
+ $m0 = S_MOV_B32 killed $sgpr12
+ DBG_LABEL 0
+ DBG_LABEL 1
+ DBG_LABEL 2
+ DBG_LABEL 3
+ DBG_LABEL 4
+ DBG_LABEL 5
+ DBG_LABEL 6
+ S_SENDMSG 3, implicit $exec, implicit $m0
+ }
+ S_ENDPGM 0
+...