}
}
+/// Return true if \p MBB has one successor immediately following, and is its
+/// only predecessor
+static bool hasTrivialSuccessor(const MachineBasicBlock &MBB) {
+ if (MBB.succ_size() != 1)
+ return false;
+
+ const MachineBasicBlock *Succ = *MBB.succ_begin();
+ return (Succ->pred_size() == 1) && MBB.isLayoutSuccessor(Succ);
+}
+
// FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
// around other non-memory instructions.
bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
EndPgmBlocks.push_back(&MBB);
}
- // Wait for everything at the end of the MBB
- Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
+ // Wait for everything at the end of the MBB. If there is only one
+ // successor, we can defer this until the uses there.
+ if (!hasTrivialSuccessor(MBB))
+ Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
}
if (HaveScalarStores) {
<4 x i32> addrspace(4)* %flat16) {
ret void
}
+
+ define void @single_fallthrough_successor_no_end_block_wait() {
+ ret void
+ }
+
+ define void @single_branch_successor_not_next_block() {
+ ret void
+ }
+
...
---
# CHECK-LABEL: bb.1:
# CHECK: FLAT_LOAD_DWORD
+# CHECK: S_WAITCNT 3952
# CHECK: FLAT_LOAD_DWORDX4
# The first load has no mem operand, so we should assume it accesses the flat
# address space.
# s_waitcnt vmcnt(0) lgkmcnt(0)
-# CHECK-NEXT: S_WAITCNT 112
+# CHECK-NEXT: S_WAITCNT 127
# CHECK-LABEL: bb.2:
# CHECK: FLAT_LOAD_DWORD
+# CHECK: S_WAITCNT 3952
# CHECK: FLAT_LOAD_DWORDX4
+
# One outstand loads access the flat address space.
# s_waitcnt vmcnt(0) lgkmcnt(0)
-# CHECK-NEXT: S_WAITCNT 112
+# CHECK-NEXT: S_WAITCNT 127
name: flat_zero_waitcnt
%vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec
S_ENDPGM
...
+---
+# There is only a single fallthrough successor block, so there's no
+# need to wait immediately.
+
+# CHECK-LABEL: name: single_fallthrough_successor_no_end_block_wait
+# CHECK: %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2
+# CHECK-NOT: S_WAITCNT
+
+# CHECK: bb.1:
+# CHECK-NEXT: V_LSHLREV_B64
+# CHECK-NEXT: S_WAITCNT 112
+# CHECK-NEXT: FLAT_STORE_DWORD
+name: single_fallthrough_successor_no_end_block_wait
+
+body: |
+ bb.0:
+ successors: %bb.1
+ %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr
+
+ bb.1:
+ %vgpr3_vgpr4 = V_LSHLREV_B64 4, %vgpr7_vgpr8, implicit %exec
+ FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr
+ S_ENDPGM
+...
+---
+# The block has a single predecessor with a single successor, but it
+# is not the next block so it's non-obvious that the wait is not needed.
+
+
+# CHECK-LABEL: name: single_branch_successor_not_next_block
+# CHECK: %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2
+# CHECK-NEXT: S_WAITCNT 112
+
+# CHECK: bb.1
+# CHECK-NEXT: FLAT_STORE_DWORD
+# CHECK-NEXT: S_ENDPGM
+
+# CHECK: bb.2:
+# CHECK-NEXT: V_LSHLREV_B64
+# CHECK-NEXT: FLAT_STORE_DWORD
+name: single_branch_successor_not_next_block
+
+body: |
+ bb.0:
+ successors: %bb.2
+ %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr
+ S_BRANCH %bb.2
+
+ bb.1:
+ FLAT_STORE_DWORD %vgpr8_vgpr9, %vgpr10, 0, 0, 0, implicit %exec, implicit %flat_scr
+ S_ENDPGM
+
+ bb.2:
+ %vgpr3_vgpr4 = V_LSHLREV_B64 4, %vgpr7_vgpr8, implicit %exec
+ FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr
+ S_ENDPGM
+...