MachineBasicBlock::iterator
getSaluInsertionAtEnd(MachineBasicBlock &MBB) const;
+ bool isVreg1(unsigned Reg) const {
+ return TargetRegisterInfo::isVirtualRegister(Reg) &&
+ MRI->getRegClass(Reg) == &AMDGPU::VReg_1RegClass;
+ }
+
bool isLaneMaskReg(unsigned Reg) const {
return TII->getRegisterInfo().isSGPRReg(*MRI, Reg) &&
TII->getRegisterInfo().getRegSizeInBits(Reg, *MRI) ==
unsigned DstReg = MI.getOperand(0).getReg();
unsigned SrcReg = MI.getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
- MRI->getRegClass(SrcReg) != &AMDGPU::VReg_1RegClass)
+ if (!isVreg1(SrcReg))
continue;
- if (isLaneMaskReg(DstReg) ||
- (TargetRegisterInfo::isVirtualRegister(DstReg) &&
- MRI->getRegClass(DstReg) == &AMDGPU::VReg_1RegClass))
+ if (isLaneMaskReg(DstReg) || isVreg1(DstReg))
continue;
// Copy into a 32-bit vector register.
for (MachineInstr &MI : MBB.phis()) {
unsigned DstReg = MI.getOperand(0).getReg();
- if (MRI->getRegClass(DstReg) != &AMDGPU::VReg_1RegClass)
+ if (!isVreg1(DstReg))
continue;
LLVM_DEBUG(dbgs() << "Lower PHI: " << MI);
if (IncomingDef->getOpcode() == AMDGPU::COPY) {
IncomingReg = IncomingDef->getOperand(1).getReg();
- assert(isLaneMaskReg(IncomingReg));
+ assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg));
assert(!IncomingDef->getOperand(1).getSubReg());
} else if (IncomingDef->getOpcode() == AMDGPU::IMPLICIT_DEF) {
continue;
continue;
unsigned DstReg = MI.getOperand(0).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(DstReg) ||
- MRI->getRegClass(DstReg) != &AMDGPU::VReg_1RegClass)
+ if (!isVreg1(DstReg))
continue;
if (MRI->use_empty(DstReg)) {
assert(!MI.getOperand(1).getSubReg());
if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
- !isLaneMaskReg(SrcReg)) {
+ (!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) {
assert(TII->getRegisterInfo().getRegSizeInBits(SrcReg, *MRI) == 32);
unsigned TmpReg = createLaneMaskReg(*MF);
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CMP_NE_U32_e64), TmpReg)
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-i1-copies -o - %s | FileCheck %s
+
+# The strange block ordering visits the use before the def.
+---
+name: inserted_cmp_operand_class_rpo
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; CHECK-LABEL: name: inserted_cmp_operand_class_rpo
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: S_BRANCH %bb.3
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY %1
+ ; CHECK: bb.2:
+ ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[COPY]]
+ ; CHECK: S_ENDPGM 0
+ ; CHECK: bb.3:
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0
+ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MOV_B32_e32_1]], killed [[S_MOV_B32_]], implicit $exec
+ ; CHECK: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[V_CMP_EQ_U32_e64_]]
+ ; CHECK: S_BRANCH %bb.1
+ bb.0:
+ successors: %bb.3
+
+ S_BRANCH %bb.3
+
+ bb.1:
+ successors: %bb.2
+
+ %0:vreg_1 = COPY %1
+
+ bb.2:
+ %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %3:sreg_64_xexec = COPY %0
+ S_ENDPGM 0
+
+ bb.3:
+ successors: %bb.1
+
+ %4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %5:sreg_32_xm0 = S_MOV_B32 0
+ %6:sreg_64 = V_CMP_EQ_U32_e64 killed %4, killed %5, implicit $exec
+ %1:vreg_1 = COPY %6
+ S_BRANCH %bb.1