From 0774fb325eb3867f039ba1e7513ce8125d36babf Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 15 Jul 2019 19:44:07 +0000 Subject: [PATCH] AMDGPU/GlobalISel: Fix handling of sgpr (not scc bank) s1 to VCC This was emitting a copy from a 32-bit register to a 64-bit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366117 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../AMDGPU/AMDGPUInstructionSelector.cpp | 40 ++++++++++------- .../AMDGPU/GlobalISel/inst-select-copy.mir | 45 +++++++++++++++---- 2 files changed, 59 insertions(+), 26 deletions(-) diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 5a73b0d1139..0d02e738b4b 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -86,8 +86,9 @@ bool AMDGPUInstructionSelector::isVCC(Register Reg, const TargetRegisterClass *RC = RegClassOrBank.dyn_cast(); if (RC) { + const LLT Ty = MRI.getType(Reg); return RC->hasSuperClassEq(TRI.getBoolRC()) && - MRI.getType(Reg).getSizeInBits() == 1; + Ty.isValid() && Ty.getSizeInBits() == 1; } const RegisterBank *RB = RegClassOrBank.get(); @@ -95,29 +96,34 @@ bool AMDGPUInstructionSelector::isVCC(Register Reg, } bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { + const DebugLoc &DL = I.getDebugLoc(); MachineBasicBlock *BB = I.getParent(); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); I.setDesc(TII.get(TargetOpcode::COPY)); - // Special case for COPY from the scc register bank. The scc register bank - // is modeled using 32-bit sgprs. const MachineOperand &Src = I.getOperand(1); - unsigned SrcReg = Src.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(SrcReg) && isSCC(SrcReg, MRI)) { - unsigned DstReg = I.getOperand(0).getReg(); - - // Specially handle scc->vcc copies. - if (isVCC(DstReg, MRI)) { - const DebugLoc &DL = I.getDebugLoc(); - BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg) - .addImm(0) - .addReg(SrcReg); - if (!MRI.getRegClassOrNull(SrcReg)) - MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI)); - I.eraseFromParent(); - return true; + MachineOperand &Dst = I.getOperand(0); + Register DstReg = Dst.getReg(); + Register SrcReg = Src.getReg(); + + if (isVCC(DstReg, MRI)) { + if (SrcReg == AMDGPU::SCC) { + const TargetRegisterClass *RC + = TRI.getConstrainedRegClassForOperand(Dst, MRI); + if (!RC) + return true; + return RBI.constrainGenericRegister(DstReg, *RC, MRI); } + + BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg) + .addImm(0) + .addReg(SrcReg); + + if (!MRI.getRegClassOrNull(SrcReg)) + MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI)); + I.eraseFromParent(); + return true; } for (const MachineOperand &MO : I.operands()) { diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir index 6722f1608e2..1e20e6c76eb 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir @@ -148,9 +148,6 @@ regBankSelected: true body: | bb.0: liveins: $sgpr0 - ; GCN-LABEL: name: copy_sgpr_no_type - ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 - ; GCN: S_ENDPGM 0, implicit [[COPY]] ; WAVE64-LABEL: name: copy_sgpr_no_type ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 ; WAVE64: S_ENDPGM 0, implicit [[COPY]] @@ -174,9 +171,6 @@ regBankSelected: true body: | bb.0: liveins: $vgpr0 - ; GCN-LABEL: name: copy_vgpr_no_type - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: S_ENDPGM 0, implicit [[COPY]] ; WAVE64-LABEL: name: copy_vgpr_no_type ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: S_ENDPGM 0, implicit [[COPY]] @@ -200,9 +194,6 @@ regBankSelected: true body: | bb.0: liveins: $sgpr0_sgpr1 - ; GCN-LABEL: name: copy_maybe_vcc - ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 - ; GCN: S_ENDPGM 0, implicit [[COPY]] ; WAVE64-LABEL: name: copy_maybe_vcc ; WAVE64: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 ; WAVE64: S_ENDPGM 0, implicit [[COPY]] @@ -215,3 +206,39 @@ body: | S_ENDPGM 0, implicit %1 ... + +--- + +name: copy_sgpr_s1_to_vcc +legalized: true +regBankSelected: true + + +body: | + ; WAVE64-LABEL: name: copy_sgpr_s1_to_vcc + ; WAVE64: bb.0: + ; WAVE64: successors: %bb.1(0x80000000) + ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec + ; WAVE64: $vcc = COPY [[V_CMP_NE_U32_e64_]] + ; WAVE64: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; WAVE64: bb.1: + ; WAVE32-LABEL: name: copy_sgpr_s1_to_vcc + ; WAVE32: bb.0: + ; WAVE32: successors: %bb.1(0x80000000) + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec + ; WAVE32: $vcc_lo = COPY [[V_CMP_NE_U32_e64_]] + ; WAVE32: S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo + ; WAVE32: bb.1: + bb.0: + liveins: $sgpr0_sgpr1 + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s1) = G_TRUNC %0 + %2:vcc(s1) = COPY %1 + G_BRCOND %2, %bb.1 + + bb.1: + +... -- 2.40.0