From: Nicolai Haehnle Date: Thu, 27 Jun 2019 16:56:44 +0000 (+0000) Subject: AMDGPU: Make fixing i1 copies robust against re-ordering X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c71a1c39025ae135543eb6c2130e8f628b380dbe;p=llvm AMDGPU: Make fixing i1 copies robust against re-ordering Summary: The new test case led to incorrect code. Change-Id: Ief48b227e97aa662dd3535c9bafb27d4a184efca Reviewers: arsenm, david-salinas Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D63871 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364566 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/SILowerI1Copies.cpp b/lib/Target/AMDGPU/SILowerI1Copies.cpp index 9a7f6a8f242..1c0f836f07e 100644 --- a/lib/Target/AMDGPU/SILowerI1Copies.cpp +++ b/lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -95,6 +95,11 @@ private: MachineBasicBlock::iterator getSaluInsertionAtEnd(MachineBasicBlock &MBB) const; + bool isVreg1(unsigned Reg) const { + return TargetRegisterInfo::isVirtualRegister(Reg) && + MRI->getRegClass(Reg) == &AMDGPU::VReg_1RegClass; + } + bool isLaneMaskReg(unsigned Reg) const { return TII->getRegisterInfo().isSGPRReg(*MRI, Reg) && TII->getRegisterInfo().getRegSizeInBits(Reg, *MRI) == @@ -494,13 +499,10 @@ void SILowerI1Copies::lowerCopiesFromI1() { unsigned DstReg = MI.getOperand(0).getReg(); unsigned SrcReg = MI.getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || - MRI->getRegClass(SrcReg) != &AMDGPU::VReg_1RegClass) + if (!isVreg1(SrcReg)) continue; - if (isLaneMaskReg(DstReg) || - (TargetRegisterInfo::isVirtualRegister(DstReg) && - MRI->getRegClass(DstReg) == &AMDGPU::VReg_1RegClass)) + if (isLaneMaskReg(DstReg) || isVreg1(DstReg)) continue; // Copy into a 32-bit vector register. @@ -543,7 +545,7 @@ void SILowerI1Copies::lowerPhis() { for (MachineInstr &MI : MBB.phis()) { unsigned DstReg = MI.getOperand(0).getReg(); - if (MRI->getRegClass(DstReg) != &AMDGPU::VReg_1RegClass) + if (!isVreg1(DstReg)) continue; LLVM_DEBUG(dbgs() << "Lower PHI: " << MI); @@ -560,7 +562,7 @@ void SILowerI1Copies::lowerPhis() { if (IncomingDef->getOpcode() == AMDGPU::COPY) { IncomingReg = IncomingDef->getOperand(1).getReg(); - assert(isLaneMaskReg(IncomingReg)); + assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg)); assert(!IncomingDef->getOperand(1).getSubReg()); } else if (IncomingDef->getOpcode() == AMDGPU::IMPLICIT_DEF) { continue; @@ -668,8 +670,7 @@ void SILowerI1Copies::lowerCopiesToI1() { continue; unsigned DstReg = MI.getOperand(0).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DstReg) || - MRI->getRegClass(DstReg) != &AMDGPU::VReg_1RegClass) + if (!isVreg1(DstReg)) continue; if (MRI->use_empty(DstReg)) { @@ -689,7 +690,7 @@ void SILowerI1Copies::lowerCopiesToI1() { assert(!MI.getOperand(1).getSubReg()); if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || - !isLaneMaskReg(SrcReg)) { + (!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) { assert(TII->getRegisterInfo().getRegSizeInBits(SrcReg, *MRI) == 32); unsigned TmpReg = createLaneMaskReg(*MF); BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CMP_NE_U32_e64), TmpReg) diff --git a/test/CodeGen/AMDGPU/i1-copies-rpo.mir b/test/CodeGen/AMDGPU/i1-copies-rpo.mir new file mode 100644 index 00000000000..4a858c0d744 --- /dev/null +++ b/test/CodeGen/AMDGPU/i1-copies-rpo.mir @@ -0,0 +1,51 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-i1-copies -o - %s | FileCheck %s + +# The strange block ordering visits the use before the def. +--- +name: inserted_cmp_operand_class_rpo +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; CHECK-LABEL: name: inserted_cmp_operand_class_rpo + ; CHECK: bb.0: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: S_BRANCH %bb.3 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY %1 + ; CHECK: bb.2: + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[COPY]] + ; CHECK: S_ENDPGM 0 + ; CHECK: bb.3: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0 + ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MOV_B32_e32_1]], killed [[S_MOV_B32_]], implicit $exec + ; CHECK: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[V_CMP_EQ_U32_e64_]] + ; CHECK: S_BRANCH %bb.1 + bb.0: + successors: %bb.3 + + S_BRANCH %bb.3 + + bb.1: + successors: %bb.2 + + %0:vreg_1 = COPY %1 + + bb.2: + %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %3:sreg_64_xexec = COPY %0 + S_ENDPGM 0 + + bb.3: + successors: %bb.1 + + %4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %5:sreg_32_xm0 = S_MOV_B32 0 + %6:sreg_64 = V_CMP_EQ_U32_e64 killed %4, killed %5, implicit $exec + %1:vreg_1 = COPY %6 + S_BRANCH %bb.1