From: Matt Arsenault Date: Mon, 1 Jul 2019 13:22:07 +0000 (+0000) Subject: AMDGPU/GlobalISel: Fix scc->vcc copy handling X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=23b47db4b09eeaf3865698d574ebc3bab6e56e27;p=llvm AMDGPU/GlobalISel: Fix scc->vcc copy handling This was checking the size of the register with the value of the size, which happens to be exec. Also fix assuming VCC is 64-bit to fix wave32. Also remove some untested handling for physical registers which is skipped. This doesn't insert the V_CNDMASK_B32 if SCC is the physical copy source. I'm not sure if this should be trying to handle this special case instead of dealing with this in copyPhysReg. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364761 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index c7237e42571..ce45eab202d 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -60,11 +60,7 @@ AMDGPUInstructionSelector::AMDGPUInstructionSelector( const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; } static bool isSCC(unsigned Reg, const MachineRegisterInfo &MRI) { - if (Reg == AMDGPU::SCC) - return true; - - if (TargetRegisterInfo::isPhysicalRegister(Reg)) - return false; + assert(!TargetRegisterInfo::isPhysicalRegister(Reg)); auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); const TargetRegisterClass *RC = @@ -77,6 +73,22 @@ static bool isSCC(unsigned Reg, const MachineRegisterInfo &MRI) { return RB->getID() == AMDGPU::SCCRegBankID; } +static bool isVCC(unsigned Reg, const MachineRegisterInfo &MRI, + const SIRegisterInfo &TRI) { + assert(!TargetRegisterInfo::isPhysicalRegister(Reg)); + + auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); + const TargetRegisterClass *RC = + RegClassOrBank.dyn_cast(); + if (RC) { + return RC == TRI.getWaveMaskRegClass() && + MRI.getType(Reg).getSizeInBits() == 1; + } + + const RegisterBank *RB = RegClassOrBank.get(); + return RB->getID() == AMDGPU::VCCRegBankID; +} + bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); MachineFunction *MF = BB->getParent(); @@ -88,14 +100,12 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { const MachineOperand &Src = I.getOperand(1); unsigned SrcReg = Src.getReg(); if (!TargetRegisterInfo::isPhysicalRegister(SrcReg) && isSCC(SrcReg, MRI)) { - unsigned DstReg = TRI.getRegSizeInBits(I.getOperand(0).getReg(), MRI); - unsigned DstSize = TRI.getRegSizeInBits(DstReg, MRI); + unsigned DstReg = I.getOperand(0).getReg(); - // We have a copy from a 32-bit to 64-bit register. This happens - // when we are selecting scc->vcc copies. - if (DstSize == 64) { + // Specially handle scc->vcc copies. + if (isVCC(DstReg, MRI, TRI)) { const DebugLoc &DL = I.getDebugLoc(); - BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), I.getOperand(0).getReg()) + BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg) .addImm(0) .addReg(SrcReg); if (!MRI.getRegClassOrNull(SrcReg)) diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index 3579c2f92d2..8181afc9a10 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1690,8 +1690,8 @@ SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size, case AMDGPU::VGPRRegBankID: return &AMDGPU::VGPR_32RegClass; case AMDGPU::VCCRegBankID: - // TODO: Check wavesize - return &AMDGPU::SReg_64_XEXECRegClass; + return isWave32 ? + &AMDGPU::SReg_32_XM0_XEXECRegClass : &AMDGPU::SReg_64_XEXECRegClass; case AMDGPU::SGPRRegBankID: return &AMDGPU::SReg_32_XM0RegClass; case AMDGPU::SCCRegBankID: diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir index 9a5931fcd96..2cf975da5f1 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN +# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel -o - %s | FileCheck -check-prefix=WAVE64 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -run-pass=instruction-select -verify-machineinstrs -global-isel -o - %s | FileCheck -check-prefix=WAVE32 %s --- @@ -11,11 +12,18 @@ regBankSelected: true body: | bb.0: liveins: $sgpr2_sgpr3 - ; GCN-LABEL: name: copy - ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3 - ; GCN: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]] - ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + + ; WAVE64-LABEL: name: copy + ; WAVE64: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3 + ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]] + ; WAVE64: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; WAVE64: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; WAVE32-LABEL: name: copy + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3 + ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]] + ; WAVE32: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; WAVE32: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr %0:sgpr(p1) = COPY $sgpr2_sgpr3 %1:vgpr(p1) = COPY %0 %2:vgpr(s32) = G_IMPLICIT_DEF @@ -23,21 +31,31 @@ body: | ... --- -name: copy_vcc_scc +name: copy_vcc_bank_scc_bank legalized: true regBankSelected: true body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc - ; GCN-LABEL: name: copy_vcc_scc - ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc - ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec - ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; GCN: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + + ; WAVE64-LABEL: name: copy_vcc_bank_scc_bank + ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; WAVE64: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc + ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec + ; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec + ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; WAVE32-LABEL: name: copy_vcc_bank_scc_bank + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; WAVE32: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc + ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec + ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec + ; WAVE32: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -48,23 +66,35 @@ body: | ... --- -name: copy_vcc_scc_2_uses +name: copy_vcc_bank_scc_bank_2_uses legalized: true regBankSelected: true body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc - ; GCN-LABEL: name: copy_vcc_scc_2_uses - ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc - ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec - ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; GCN: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec - ; GCN: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec - ; GCN: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + + ; WAVE64-LABEL: name: copy_vcc_bank_scc_bank_2_uses + ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; WAVE64: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc + ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec + ; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec + ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec + ; WAVE64: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec + ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; WAVE32-LABEL: name: copy_vcc_bank_scc_bank_2_uses + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; WAVE32: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc + ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec + ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec + ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec + ; WAVE32: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec + ; WAVE32: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -75,4 +105,36 @@ body: | %7:vgpr(s32) = G_SELECT %6, %1, %5 G_STORE %7, %0 :: (store 4, addrspace 1) ... + --- + +name: copy_vcc_bank_scc_physreg +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc + + ; WAVE64-LABEL: name: copy_vcc_bank_scc_physreg + ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; WAVE64: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY $scc + ; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec + ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; WAVE32-LABEL: name: copy_vcc_bank_scc_physreg + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; WAVE32: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc + ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec + ; WAVE32: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + %2:vgpr(s32) = COPY $vgpr3 + %3:vcc(s1) = COPY $scc + %5:vgpr(s32) = G_SELECT %3, %1, %2 + G_STORE %5, %0 :: (store 4, addrspace 1) +...