From: Matt Arsenault Date: Mon, 1 Jul 2019 16:19:39 +0000 (+0000) Subject: AMDGPU/GlobalISel: RegBankSelect for readlane/readfirstlane X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8d62be14dc583e4f1e2e79d56b396261f64b8fd5;p=llvm AMDGPU/GlobalISel: RegBankSelect for readlane/readfirstlane git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364801 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 37735bab016..f6d8a1a20d7 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -142,6 +142,28 @@ AMDGPURegisterBankInfo::addMappingFromTable( return AltMappings; } +RegisterBankInfo::InstructionMappings +AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsic( + const MachineInstr &MI, const MachineRegisterInfo &MRI) const { + switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { + case Intrinsic::amdgcn_readlane: { + static const OpRegBankEntry<3> Table[2] = { + // Perfectly legal. + { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 }, + + // Need a readfirstlane for the index. + { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 } + }; + + const std::array RegSrcOpIdx = { { 0, 2, 3 } }; + return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table)); + } + + default: + return RegisterBankInfo::getInstrAlternativeMappings(MI); + } +} + RegisterBankInfo::InstructionMappings AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects( const MachineInstr &MI, const MachineRegisterInfo &MRI) const { @@ -365,6 +387,8 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings( AltMappings.push_back(&VMapping); return AltMappings; } + case AMDGPU::G_INTRINSIC: + return getInstrAlternativeMappingsIntrinsic(MI, MRI); case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: return getInstrAlternativeMappingsIntrinsicWSideEffects(MI, MRI); default: @@ -718,6 +742,28 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop( .addReg(SaveExecReg); } +// Legalize an operand that must be an SGPR by inserting a readfirstlane. +void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane( + MachineInstr &MI, MachineRegisterInfo &MRI, unsigned OpIdx) const { + unsigned Reg = MI.getOperand(OpIdx).getReg(); + const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI); + if (Bank != &AMDGPU::VGPRRegBank) + return; + + MachineIRBuilder B(MI); + unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + B.buildInstr(AMDGPU::V_READFIRSTLANE_B32) + .addDef(SGPR) + .addReg(Reg); + + const TargetRegisterClass *Constrained = + constrainGenericRegister(Reg, AMDGPU::VGPR_32RegClass, MRI); + (void)Constrained; + assert(Constrained && "Failed to constrain readfirstlane src reg"); + + MI.getOperand(OpIdx).setReg(SGPR); +} + void AMDGPURegisterBankInfo::applyMappingImpl( const OperandsMapper &OpdMapper) const { MachineInstr &MI = OpdMapper.getMI(); @@ -935,6 +981,20 @@ void AMDGPURegisterBankInfo::applyMappingImpl( executeInWaterfallLoop(MI, MRI, { 2, 3 }); return; } + case Intrinsic::amdgcn_readlane: { + SmallVector SrcReg(OpdMapper.getVRegs(2)); + + if (!SrcReg.empty()) { + assert(SrcReg.size() == 1); + MI.getOperand(2).setReg(SrcReg[0]); + } + + assert(empty(OpdMapper.getVRegs(0))); + assert(empty(OpdMapper.getVRegs(3))); + + constrainOpWithReadfirstlane(MI, MRI, 3); // Index + return; + } default: break; } @@ -1589,6 +1649,21 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[3] = AMDGPU::getValueMapping(Op2Bank, OpSize); break; } + case Intrinsic::amdgcn_readlane: { + // This must be an SGPR, but accept a VGPR. + unsigned IdxReg = MI.getOperand(3).getReg(); + unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits(); + unsigned IdxBank = getRegBankID(IdxReg, MRI, *TRI, AMDGPU::SGPRRegBankID); + OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize); + LLVM_FALLTHROUGH; + } + case Intrinsic::amdgcn_readfirstlane: { + unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + unsigned SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); + OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize); + OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize); + break; + } } break; } diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h index f05e0b75c9e..05f7b1f29f0 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -42,6 +42,9 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { MachineRegisterInfo &MRI, ArrayRef OpIndices) const; + void constrainOpWithReadfirstlane(MachineInstr &MI, MachineRegisterInfo &MRI, + unsigned OpIdx) const; + /// See RegisterBankInfo::applyMapping. void applyMappingImpl(const OperandsMapper &OpdMapper) const override; @@ -71,6 +74,10 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { const std::array RegSrcOpIdx, ArrayRef> Table) const; + RegisterBankInfo::InstructionMappings + getInstrAlternativeMappingsIntrinsic( + const MachineInstr &MI, const MachineRegisterInfo &MRI) const; + RegisterBankInfo::InstructionMappings getInstrAlternativeMappingsIntrinsicWSideEffects( const MachineInstr &MI, const MachineRegisterInfo &MRI) const; diff --git a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readfirstlane.mir b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readfirstlane.mir new file mode 100644 index 00000000000..4ca5fa1ac1c --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readfirstlane.mir @@ -0,0 +1,32 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s + +--- +name: readfirstlane_s +legalized: true + +body: | + bb.0: + liveins: $sgpr0 + ; CHECK-LABEL: name: readfirstlane_s + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0 +... + +--- +name: readfirstlane_v +legalized: true + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: readfirstlane_v + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0 +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readlane.mir b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readlane.mir new file mode 100644 index 00000000000..58e66d19d68 --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readlane.mir @@ -0,0 +1,71 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s + +--- +name: readlane_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; CHECK-LABEL: name: readlane_ss + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[COPY1]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1 +... + +--- +name: readlane_vs +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + ; CHECK-LABEL: name: readlane_vs + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[COPY1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1 +... + +--- +name: readlane_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: readlane_vv + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec + ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1 +... + +--- +name: readlane_sv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + ; CHECK-LABEL: name: readlane_sv + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec + ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1 +...