From: Matt Arsenault Date: Mon, 1 Jul 2019 16:41:36 +0000 (+0000) Subject: AMDGPU/GlobalISel: RegBankSelect for amdgcn.writelane X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a8c26b261218bc0b2bf6b772e9a58300877e6609;p=llvm AMDGPU/GlobalISel: RegBankSelect for amdgcn.writelane git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364808 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index f6d8a1a20d7..c20b7976a4b 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -158,7 +158,25 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsic( const std::array RegSrcOpIdx = { { 0, 2, 3 } }; return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table)); } + case Intrinsic::amdgcn_writelane: { + static const OpRegBankEntry<4> Table[4] = { + // Perfectly legal. + { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 }, + + // Need readfirstlane of first op + { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }, + + // Need readfirstlane of second op + { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }, + // Need readfirstlane of both ops + { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 3 } + }; + + // rsrc, voffset, offset + const std::array RegSrcOpIdx = { { 0, 2, 3, 4 } }; + return addMappingFromTable<4>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table)); + } default: return RegisterBankInfo::getInstrAlternativeMappings(MI); } @@ -764,6 +782,17 @@ void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane( MI.getOperand(OpIdx).setReg(SGPR); } +// For cases where only a single copy is inserted for matching register banks. +// Replace the register in the instruction operand +static void substituteSimpleCopyRegs( + const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, unsigned OpIdx) { + SmallVector SrcReg(OpdMapper.getVRegs(OpIdx)); + if (!SrcReg.empty()) { + assert(SrcReg.size() == 1); + OpdMapper.getMI().getOperand(OpIdx).setReg(SrcReg[0]); + } +} + void AMDGPURegisterBankInfo::applyMappingImpl( const OperandsMapper &OpdMapper) const { MachineInstr &MI = OpdMapper.getMI(); @@ -982,16 +1011,23 @@ void AMDGPURegisterBankInfo::applyMappingImpl( return; } case Intrinsic::amdgcn_readlane: { - SmallVector SrcReg(OpdMapper.getVRegs(2)); + substituteSimpleCopyRegs(OpdMapper, 2); - if (!SrcReg.empty()) { - assert(SrcReg.size() == 1); - MI.getOperand(2).setReg(SrcReg[0]); - } + assert(empty(OpdMapper.getVRegs(0))); + assert(empty(OpdMapper.getVRegs(3))); + // Make sure the index is an SGPR. It doesn't make sense to run this in a + // waterfall loop, so assume it's a uniform value. + constrainOpWithReadfirstlane(MI, MRI, 3); // Index + return; + } + case Intrinsic::amdgcn_writelane: { assert(empty(OpdMapper.getVRegs(0))); + assert(empty(OpdMapper.getVRegs(2))); assert(empty(OpdMapper.getVRegs(3))); + substituteSimpleCopyRegs(OpdMapper, 4); // VGPR input val + constrainOpWithReadfirstlane(MI, MRI, 2); // Source value constrainOpWithReadfirstlane(MI, MRI, 3); // Index return; } @@ -1664,6 +1700,23 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize); break; } + case Intrinsic::amdgcn_writelane: { + unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + unsigned SrcReg = MI.getOperand(2).getReg(); + unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits(); + unsigned SrcBank = getRegBankID(SrcReg, MRI, *TRI, AMDGPU::SGPRRegBankID); + unsigned IdxReg = MI.getOperand(3).getReg(); + unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits(); + unsigned IdxBank = getRegBankID(IdxReg, MRI, *TRI, AMDGPU::SGPRRegBankID); + OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize); + + // These 2 must be SGPRs, but accept VGPRs. Readfirstlane will be inserted + // to legalize. + OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, SrcSize); + OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize); + OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize); + break; + } } break; } diff --git a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.writelane.mir b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.writelane.mir new file mode 100644 index 00000000000..669cfe8a92a --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.writelane.mir @@ -0,0 +1,98 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s + +--- +name: writelane_sss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2 + ; CHECK-LABEL: name: writelane_sss + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[COPY1]](s32), [[COPY3]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2 +... + +--- +name: writelane_ssv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $vgpr0 + ; CHECK-LABEL: name: writelane_ssv + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $vgpr0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2 +... + +--- +name: writelane_vsv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1 + ; CHECK-LABEL: name: writelane_vsv + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[V_READFIRSTLANE_B32_]], [[COPY1]](s32), [[COPY2]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2 +... + +--- +name: writelane_vvv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: writelane_vvv + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[V_READFIRSTLANE_B32_]], [[V_READFIRSTLANE_B32_1]], [[COPY2]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2 +... + +--- +name: writelane_svv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1 + ; CHECK-LABEL: name: writelane_svv + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]], [[COPY2]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2 +...