From: Matt Arsenault Date: Wed, 9 Oct 2019 22:44:49 +0000 (+0000) Subject: AMDGPU/GlobalISel: Fix crash on wide constant load with VGPR pointer X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4f540a9a67e18a0be831d97b64181c37cd1caabb;p=llvm AMDGPU/GlobalISel: Fix crash on wide constant load with VGPR pointer This was ignoring the register bank of the input pointer, and isUniformMMO seems overly aggressive. This will now conservatively assume a VGPR in cases where the incoming bank hasn't been determined yet (i.e. is from a loop phi). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@374255 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 9446814c8f8..aded210bd84 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -323,6 +323,8 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects( } } +// FIXME: Returns uniform if there's no source value information. This is +// probably wrong. static bool isInstrUniformNonExtLoadAlign4(const MachineInstr &MI) { if (!MI.hasOneMemOperand()) return false; @@ -1047,8 +1049,13 @@ bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI, SmallVector SrcRegs(OpdMapper.getVRegs(1)); // If the pointer is an SGPR, we have nothing to do. - if (SrcRegs.empty()) - return false; + if (SrcRegs.empty()) { + Register PtrReg = MI.getOperand(1).getReg(); + const RegisterBank *PtrBank = getRegBank(PtrReg, MRI, *TRI); + if (PtrBank == &AMDGPU::SGPRRegBank) + return false; + SrcRegs.push_back(PtrReg); + } assert(LoadSize % MaxNonSmrdLoadSize == 0); @@ -2025,7 +2032,7 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const { const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); - SmallVector OpdsMapping(MI.getNumOperands()); + SmallVector OpdsMapping(2); unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); LLT LoadTy = MRI.getType(MI.getOperand(0).getReg()); Register PtrReg = MI.getOperand(1).getReg(); @@ -2036,7 +2043,10 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const { const ValueMapping *ValMapping; const ValueMapping *PtrMapping; - if ((AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS && + const RegisterBank *PtrBank = getRegBank(PtrReg, MRI, *TRI); + + if (PtrBank == &AMDGPU::SGPRRegBank && + (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS && AS != AMDGPUAS::PRIVATE_ADDRESS) && isInstrUniformNonExtLoadAlign4(MI)) { // We have a uniform instruction so we want to use an SMRD load diff --git a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir index 49ac13f6666..d8bd4f777b4 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir @@ -69,6 +69,8 @@ define amdgpu_kernel void @load_constant_i32_uniform_align2() {ret void} define amdgpu_kernel void @load_constant_i32_uniform_align1() {ret void} define amdgpu_kernel void @load_private_uniform_sgpr_i32() {ret void} + define amdgpu_kernel void @load_constant_v8i32_vgpr_crash() { ret void } + define amdgpu_kernel void @load_constant_v8i32_vgpr_crash_loop_phi() { ret void } declare i32 @llvm.amdgcn.workitem.id.x() #0 attributes #0 = { nounwind readnone } @@ -652,3 +654,47 @@ body: | %0:_(p5) = COPY $sgpr0 %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 5, align 4) ... + +--- +name: load_constant_v8i32_vgpr_crash +legalized: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: load_constant_v8i32_vgpr_crash + ; CHECK: %0:vgpr(p4) = COPY $vgpr0_vgpr1 + ; CHECK: vgpr(<4 x s32>) = G_LOAD %0(p4) + ; CHECK: vgpr(<4 x s32>) = G_LOAD + ; CHECK: G_CONCAT_VECTORS + %0:_(p4) = COPY $vgpr0_vgpr1 + %1:_(<8 x s32>) = G_LOAD %0 :: (load 32, addrspace 4) +... + +--- +name: load_constant_v8i32_vgpr_crash_loop_phi +legalized: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + + ; CHECK-LABEL: name: load_constant_v8i32_vgpr_crash_loop_phi + ; CHECK: G_PHI + ; CHECK: vgpr(<4 x s32>) = G_LOAD + ; CHECK: vgpr(<4 x s32>) = G_LOAD + ; CHECK: G_CONCAT_VECTORS + + %0:_(p4) = COPY $sgpr0_sgpr1 + %1:_(p4) = COPY $sgpr2_sgpr3 + G_BR %bb.1 + + bb.1: + %2:_(p4) = G_PHI %0, %bb.0, %4, %bb.1 + %3:_(<8 x s32>) = G_LOAD %2 :: (load 32, addrspace 4) + %4:_(p4) = COPY %1 + G_BR %bb.1 +...