From 80d65f2e3aceeb5ffadb7d3ecb7bfd03101d8e52 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 7 Oct 2019 19:07:19 +0000 Subject: [PATCH] AMDGPU/GlobalISel: Use S_MOV_B64 for inline constants This hides some defects in SIFoldOperands when the immediates are split. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@373943 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../AMDGPU/AMDGPUInstructionSelector.cpp | 47 +++++++++++-------- .../GlobalISel/inst-select-constant.mir | 19 ++++---- .../GlobalISel/inst-select-load-smrd.mir | 4 +- 3 files changed, 39 insertions(+), 31 deletions(-) diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 28ebbd9101c..aa165d4ce21 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1472,31 +1472,38 @@ bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } - DebugLoc DL = I.getDebugLoc(); - const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass : - &AMDGPU::VGPR_32RegClass; - Register LoReg = MRI->createVirtualRegister(RC); - Register HiReg = MRI->createVirtualRegister(RC); - const APInt &Imm = APInt(Size, I.getOperand(1).getImm()); - - BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg) - .addImm(Imm.trunc(32).getZExtValue()); - - BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg) - .addImm(Imm.ashr(32).getZExtValue()); - - const MachineInstr *RS = - BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) - .addReg(LoReg) - .addImm(AMDGPU::sub0) - .addReg(HiReg) - .addImm(AMDGPU::sub1); + const DebugLoc &DL = I.getDebugLoc(); + + APInt Imm(Size, I.getOperand(1).getImm()); + + MachineInstr *ResInst; + if (IsSgpr && TII.isInlineConstant(Imm)) { + ResInst = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B64), DstReg) + .addImm(I.getOperand(1).getImm()); + } else { + const TargetRegisterClass *RC = IsSgpr ? + &AMDGPU::SReg_32_XM0RegClass : &AMDGPU::VGPR_32RegClass; + Register LoReg = MRI->createVirtualRegister(RC); + Register HiReg = MRI->createVirtualRegister(RC); + + BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg) + .addImm(Imm.trunc(32).getZExtValue()); + + BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg) + .addImm(Imm.ashr(32).getZExtValue()); + + ResInst = BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) + .addReg(LoReg) + .addImm(AMDGPU::sub0) + .addReg(HiReg) + .addImm(AMDGPU::sub1); + } // We can't call constrainSelectedInstRegOperands here, because it doesn't // work for target independent opcodes I.eraseFromParent(); const TargetRegisterClass *DstRC = - TRI.getConstrainedRegClassForOperand(RS->getOperand(0), *MRI); + TRI.getConstrainedRegClassForOperand(ResInst->getOperand(0), *MRI); if (!DstRC) return true; return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI); diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir index b97f9d384aa..2acf1aeb5a7 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir @@ -5,6 +5,7 @@ name: constant legalized: true regBankSelected: true +tracksRegLiveness: true body: | @@ -25,28 +26,30 @@ body: | ; GCN: %{{[0-9]+}}:sreg_32 = S_MOV_B32 1065353216 %4:sgpr(s32) = G_FCONSTANT float 1.0 + ; GCN: %5:sreg_64_xexec = S_MOV_B64 4607182418800017408 + %5:sgpr(s64) = G_FCONSTANT double 1.0 + ; GCN: [[LO1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0 - ; GCN: [[HI1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1072693248 + ; GCN: [[HI1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1076101120 ; GCN: %{{[0-9]+}}:sreg_64_xexec = REG_SEQUENCE [[LO1]], %subreg.sub0, [[HI1]], %subreg.sub1 - %5:sgpr(s64) = G_FCONSTANT double 1.0 + %6:sgpr(s64) = G_FCONSTANT double 10.0 ; GCN: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_e32 1 - %6:vgpr(s32) = G_CONSTANT i32 1 + %7:vgpr(s32) = G_CONSTANT i32 1 ; GCN: [[LO2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0 ; GCN: [[HI2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1 ; GCN: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE [[LO2]], %subreg.sub0, [[HI2]], %subreg.sub1 - %7:vgpr(s64) = G_CONSTANT i64 4294967296 + %8:vgpr(s64) = G_CONSTANT i64 4294967296 ; GCN: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_e32 1065353216 - %8:vgpr(s32) = G_FCONSTANT float 1.0 + %9:vgpr(s32) = G_FCONSTANT float 1.0 ; GCN: [[LO3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0 ; GCN: [[HI3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1072693248 ; GCN: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE [[LO3]], %subreg.sub0, [[HI3]], %subreg.sub1 - %9:vgpr(s64) = G_FCONSTANT double 1.0 + %10:vgpr(s64) = G_FCONSTANT double 1.0 - S_ENDPGM 0, implicit %2, implicit %4, implicit %6, implicit %8, implicit %3, implicit %5, implicit %7, implicit %9 + S_ENDPGM 0, implicit %2, implicit %4, implicit %5, implicit %6, implicit %8, implicit %3, implicit %5, implicit %7, implicit %9, implicit %10 ... - diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir index 367c92b5243..30cb3f032d7 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir @@ -190,9 +190,7 @@ body: | # Test a load of an offset from a constant base address # GCN-LABEL: name: constant_address_positive{{$}} -# GCN: %4:sreg_32_xm0 = S_MOV_B32 44 -# GCN: %5:sreg_32_xm0 = S_MOV_B32 0 -# GCN: %0:sreg_64 = REG_SEQUENCE %4, %subreg.sub0, %5, %subreg.sub1 +# GCN: %0:sreg_64 = S_MOV_B64 44 # VI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0, 0 :: (dereferenceable invariant load 4, addrspace 4) # SICI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0, 0 :: (dereferenceable invariant load 4, addrspace 4) -- 2.40.0