#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
namespace {
class SIFixSGPRCopies : public MachineFunctionPass {
+
+ MachineDominatorTree *MDT;
+
public:
static char ID;
StringRef getPassName() const override { return "SI Fix SGPR copies"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
} // End anonymous namespace
-INITIALIZE_PASS(SIFixSGPRCopies, DEBUG_TYPE,
- "SI Fix SGPR copies", false, false)
+INITIALIZE_PASS_BEGIN(SIFixSGPRCopies, DEBUG_TYPE,
+ "SI Fix SGPR copies", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_END(SIFixSGPRCopies, DEBUG_TYPE,
+ "SI Fix SGPR copies", false, false)
+
char SIFixSGPRCopies::ID = 0;
return false;
}
+static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB,
+ const TargetRegisterInfo &TRI) {
+ for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(),
+ E = MBB.end(); I != E; ++I) {
+ if (I->modifiesRegister(AMDGPU::EXEC, &TRI))
+ return true;
+ }
+ return false;
+}
+
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
MachineRegisterInfo &MRI = MF.getRegInfo();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
const SIInstrInfo *TII = ST.getInstrInfo();
+ MDT = &getAnalysis<MachineDominatorTree>();
SmallVector<MachineInstr *, 16> Worklist;
break;
}
case AMDGPU::PHI: {
- DEBUG(dbgs() << "Fixing PHI: " << MI);
unsigned Reg = MI.getOperand(0).getReg();
if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
break;
+ // We don't need to fix the PHI if the common dominator of the
+ // two incoming blocks terminates with a uniform branch.
+ if (MI.getNumExplicitOperands() == 5) {
+ MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
+ MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
+
+ MachineBasicBlock *NCD = MDT->findNearestCommonDominator(MBB0, MBB1);
+ if (NCD && !hasTerminatorThatModifiesExec(*NCD, *TRI)) {
+ DEBUG(dbgs() << "Not fixing PHI for uniform branch: " << MI << '\n');
+ break;
+ }
+ }
+
// If a PHI node defines an SGPR and any of its operands are VGPRs,
// then we need to move it to the VALU.
//
// ...
// use sgpr2
//
- // FIXME: This is OK if the branching decision is made based on an
- // SGPR value.
- bool SGPRBranch = false;
-
// The one exception to this rule is when one of the operands
// is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
// instruction. In this case, there we know the program will
SmallSet<unsigned, 8> Visited;
if (phiHasVGPROperands(MI, MRI, TRI, TII) ||
- (!SGPRBranch && !phiHasBreakDef(MI, MRI, Visited))) {
+ !phiHasBreakDef(MI, MRI, Visited)) {
+ DEBUG(dbgs() << "Fixing PHI: " << MI);
TII->moveToVALU(MI);
}
break;
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -march=amdgcn -mcpu=verde -machine-sink-split-probability-threshold=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -machine-sink-split-probability-threshold=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; GCN-LABEL: {{^}}uniform_if_scc:
; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0
-; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
+; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0
; GCN: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]]
; Fall-through to the else
-; GCN: v_mov_b32_e32 [[STORE_VAL]], 1
+; GCN: s_mov_b32 [[S_VAL]], 1
; GCN: [[IF_LABEL]]:
-; GCN: buffer_store_dword [[STORE_VAL]]
+; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]]
+; GCN: buffer_store_dword [[V_VAL]]
define void @uniform_if_scc(i32 %cond, i32 addrspace(1)* %out) {
entry:
%cmp0 = icmp eq i32 %cond, 0
}
; GCN-LABEL: {{^}}uniform_if_vcc:
-; FIXME: We could use _e32 here if we re-used the 0 from [[STORE_VAL]], and
-; also scheduled the write first.
; GCN-DAG: v_cmp_eq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
-; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
+; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0
; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
; Fall-through to the else
-; GCN: v_mov_b32_e32 [[STORE_VAL]], 1
+; GCN: s_mov_b32 [[S_VAL]], 1
; GCN: [[IF_LABEL]]:
-; GCN: buffer_store_dword [[STORE_VAL]]
+; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]]
+; GCN: buffer_store_dword [[V_VAL]]
define void @uniform_if_vcc(float %cond, i32 addrspace(1)* %out) {
entry:
%cmp0 = fcmp oeq float %cond, 0.0
; GCN-LABEL: {{^}}uniform_if_swap_br_targets_scc:
; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0
-; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
+; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0
; GCN: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]]
; Fall-through to the else
-; GCN: v_mov_b32_e32 [[STORE_VAL]], 1
+; GCN: s_mov_b32 [[S_VAL]], 1
; GCN: [[IF_LABEL]]:
-; GCN: buffer_store_dword [[STORE_VAL]]
+; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]]
+; GCN: buffer_store_dword [[V_VAL]]
define void @uniform_if_swap_br_targets_scc(i32 %cond, i32 addrspace(1)* %out) {
entry:
%cmp0 = icmp eq i32 %cond, 0
}
; GCN-LABEL: {{^}}uniform_if_swap_br_targets_vcc:
-; FIXME: We could use _e32 here if we re-used the 0 from [[STORE_VAL]], and
-; also scheduled the write first.
; GCN-DAG: v_cmp_neq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
-; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
+; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0
; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
; Fall-through to the else
-; GCN: v_mov_b32_e32 [[STORE_VAL]], 1
+; GCN: s_mov_b32 [[S_VAL]], 1
; GCN: [[IF_LABEL]]:
-; GCN: buffer_store_dword [[STORE_VAL]]
+; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]]
+; GCN: buffer_store_dword [[V_VAL]]
define void @uniform_if_swap_br_targets_vcc(float %cond, i32 addrspace(1)* %out) {
entry:
%cmp0 = fcmp oeq float %cond, 0.0
ret void
}
-; GCN-LABEL: {{^}}uniform_loop:
-; GCN: {{^}}[[LOOP_LABEL:[A-Z0-9_a-z]+]]:
-; FIXME: We need to teach GCNFixSGPRCopies about uniform branches so we
-; get s_add_i32 here.
-; GCN: v_add_i32_e32 [[I:v[0-9]+]], vcc, -1, v{{[0-9]+}}
-; GCN: v_cmp_ne_u32_e32 vcc, 0, [[I]]
-; GCN: s_and_b64 vcc, exec, vcc
-; GCN: s_cbranch_vccnz [[LOOP_LABEL]]
-; GCN: s_endpgm
+; SI-LABEL: {{^}}uniform_loop:
+; SI: {{^}}[[LOOP_LABEL:[A-Z0-9_a-z]+]]:
+; SI: s_add_i32 [[I:s[0-9]+]], s{{[0-9]+}}, -1
+; SI: s_cmp_lg_u32 [[I]], 0
+; SI: s_cbranch_scc1 [[LOOP_LABEL]]
+; SI: s_endpgm
define void @uniform_loop(i32 addrspace(1)* %out, i32 %a) {
entry:
br label %loop
; GCN-LABEL: {{^}}uniform_if_scc_i64_eq:
; VI-DAG: s_cmp_eq_u64 s{{\[[0-9]+:[0-9]+\]}}, 0
-; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
+; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0
; SI: v_cmp_eq_u64_e64
; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
; VI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]]
; Fall-through to the else
-; GCN: v_mov_b32_e32 [[STORE_VAL]], 1
+; GCN: s_mov_b32 [[S_VAL]], 1
; GCN: [[IF_LABEL]]:
-; GCN: buffer_store_dword [[STORE_VAL]]
+; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]]
+; GCN: buffer_store_dword [[V_VAL]]
define void @uniform_if_scc_i64_eq(i64 %cond, i32 addrspace(1)* %out) {
entry:
%cmp0 = icmp eq i64 %cond, 0
; GCN-LABEL: {{^}}uniform_if_scc_i64_ne:
; VI-DAG: s_cmp_lg_u64 s{{\[[0-9]+:[0-9]+\]}}, 0
-; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
+; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0
; SI: v_cmp_ne_u64_e64
; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
; VI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]]
; Fall-through to the else
-; GCN: v_mov_b32_e32 [[STORE_VAL]], 1
+; GCN: s_mov_b32 [[S_VAL]], 1
; GCN: [[IF_LABEL]]:
-; GCN: buffer_store_dword [[STORE_VAL]]
+; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]]
+; GCN: buffer_store_dword [[V_VAL]]
define void @uniform_if_scc_i64_ne(i64 %cond, i32 addrspace(1)* %out) {
entry:
%cmp0 = icmp ne i64 %cond, 0
}
; GCN-LABEL: {{^}}uniform_if_scc_i64_sgt:
+; GCN: s_mov_b32 [[S_VAL:s[0-9]+]], 0
; GCN: v_cmp_gt_i64_e64
; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
; Fall-through to the else
-; GCN: v_mov_b32_e32 [[STORE_VAL]], 1
+; GCN: s_mov_b32 [[S_VAL]], 1
; GCN: [[IF_LABEL]]:
-; GCN: buffer_store_dword [[STORE_VAL]]
+; GCN: v_mov_b32_e32 [[V_VAL]], [[S_VAL]]
+; GCN: buffer_store_dword [[V_VAL]]
define void @uniform_if_scc_i64_sgt(i64 %cond, i32 addrspace(1)* %out) {
entry:
%cmp0 = icmp sgt i64 %cond, 0