From fcd8db8a7db1996bfdd28d832c81baced2b45ecb Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 15 Jul 2019 19:46:48 +0000 Subject: [PATCH] AMDGPU/GlobalISel: Fix selecting vcc->vcc bank copies The extra test change is correct, although how it arrives there is a bug that needs work. With wave32, the test for isVCC ambiguously reports true for an SCC or VCC source. A new allocatable pseudo register class for SCC may be necesssary. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366119 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../AMDGPU/AMDGPUInstructionSelector.cpp | 22 ++++++------ .../AMDGPU/GlobalISel/inst-select-copy.mir | 34 +++++++++++++++++-- 2 files changed, 43 insertions(+), 13 deletions(-) diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index f916154c319..f5a742b1e22 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -116,18 +116,20 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { return RBI.constrainGenericRegister(DstReg, *RC, MRI); } - // TODO: Should probably leave the copy and let copyPhysReg expand it. - if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), MRI)) - return false; + if (!isVCC(SrcReg, MRI)) { + // TODO: Should probably leave the copy and let copyPhysReg expand it. + if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), MRI)) + return false; - BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg) - .addImm(0) - .addReg(SrcReg); + BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg) + .addImm(0) + .addReg(SrcReg); - if (!MRI.getRegClassOrNull(SrcReg)) - MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI)); - I.eraseFromParent(); - return true; + if (!MRI.getRegClassOrNull(SrcReg)) + MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI)); + I.eraseFromParent(); + return true; + } } for (const MachineOperand &MO : I.operands()) { diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir index 548dde263ff..81ffcd5464c 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir @@ -90,10 +90,10 @@ body: | ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; WAVE32: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc + ; WAVE32: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]] + ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY4]], implicit $exec ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec - ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec - ; WAVE32: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec + ; WAVE32: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec ; WAVE32: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 @@ -268,3 +268,31 @@ body: | S_ENDPGM 0, implicit %2 ... + +--- + +name: copy_s1_vcc_to_vcc +legalized: true +regBankSelected: true + + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; WAVE64-LABEL: name: copy_s1_vcc_to_vcc + ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NE_U32_e64_]] + ; WAVE32-LABEL: name: copy_s1_vcc_to_vcc + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NE_U32_e64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s1) = G_TRUNC %0 + %2:vcc(s1) = COPY %1 + %3:vcc(s1) = COPY %2 + S_ENDPGM 0, implicit %3 + +... -- 2.40.0