From: Matt Arsenault Date: Tue, 28 Jun 2016 01:09:00 +0000 (+0000) Subject: AMDGPU: Fix out of bounds indirect indexing errors X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=1bf162a64a244eca4a75d7079e51195b2169d4b0;p=llvm AMDGPU: Fix out of bounds indirect indexing errors This was producing acceses to registers beyond the super register's limits, resulting in verifier failures. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@273977 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp index 2262ce2f35b..861aa83888d 100644 --- a/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -102,7 +102,8 @@ private: int Offset); bool loadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset = 0); - void computeIndirectRegAndOffset(unsigned VecReg, unsigned &Reg, int &Offset); + std::pair computeIndirectRegAndOffset(unsigned VecReg, + int Offset) const; bool indirectSrc(MachineInstr &MI); bool indirectDst(MachineInstr &MI); @@ -538,16 +539,25 @@ bool SILowerControlFlow::loadM0(MachineInstr &MI, MachineInstr *MovRel, int Offs // indirect Index. e.g. v0 = v[VecReg + Offset] // As an output, this is a constant value that needs // to be added to the value stored in M0. -void SILowerControlFlow::computeIndirectRegAndOffset(unsigned VecReg, - unsigned &Reg, - int &Offset) { +std::pair +SILowerControlFlow::computeIndirectRegAndOffset(unsigned VecReg, + int Offset) const { unsigned SubReg = TRI->getSubReg(VecReg, AMDGPU::sub0); if (!SubReg) SubReg = VecReg; + const TargetRegisterClass *SuperRC = TRI->getPhysRegClass(VecReg); const TargetRegisterClass *RC = TRI->getPhysRegClass(SubReg); - int RegIdx = TRI->getHWRegIndex(SubReg) + Offset; + int NumElts = SuperRC->getSize() / RC->getSize(); + int BaseRegIdx = TRI->getHWRegIndex(SubReg); + + // Skip out of bounds offsets, or else we would end up using an undefined + // register. + if (Offset >= NumElts) + return std::make_pair(RC->getRegister(BaseRegIdx), Offset); + + int RegIdx = BaseRegIdx + Offset; if (RegIdx < 0) { Offset = RegIdx; RegIdx = 0; @@ -555,7 +565,8 @@ void SILowerControlFlow::computeIndirectRegAndOffset(unsigned VecReg, Offset = 0; } - Reg = RC->getRegister(RegIdx); + unsigned Reg = RC->getRegister(RegIdx); + return std::make_pair(Reg, Offset); } // Return true if a new block was inserted. @@ -568,7 +579,7 @@ bool SILowerControlFlow::indirectSrc(MachineInstr &MI) { int Off = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm(); unsigned Reg; - computeIndirectRegAndOffset(SrcVec->getReg(), Reg, Off); + std::tie(Reg, Off) = computeIndirectRegAndOffset(SrcVec->getReg(), Off); MachineInstr *MovRel = BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst) @@ -588,7 +599,7 @@ bool SILowerControlFlow::indirectDst(MachineInstr &MI) { MachineOperand *Val = TII->getNamedOperand(MI, AMDGPU::OpName::val); unsigned Reg; - computeIndirectRegAndOffset(Dst, Reg, Off); + std::tie(Reg, Off) = computeIndirectRegAndOffset(Dst, Off); MachineInstr *MovRel = BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32)) diff --git a/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/test/CodeGen/AMDGPU/indirect-addressing-si.ll index 56e2f38a49b..9f57dd2f321 100644 --- a/test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ b/test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -421,6 +421,37 @@ bb: ret void } +; offset puts outside of superegister bounaries, so clamp to 1st element. +; CHECK-LABEL: {{^}}extract_largest_inbounds_offset: +; CHECK: buffer_load_dwordx4 v{{\[}}[[LO_ELT:[0-9]+]]:[[HI_ELT:[0-9]+]]{{\]}} +; CHECK: s_load_dword [[IDX:s[0-9]+]] +; CHECK: s_mov_b32 m0, [[IDX]] +; CHECK-NEXT: v_movrels_b32_e32 [[EXTRACT:v[0-9]+]], v[[HI_ELT]] +; CHECK: buffer_store_dword [[EXTRACT]] +define void @extract_largest_inbounds_offset(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %idx) { +entry: + %ld = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in + %offset = add i32 %idx, 3 + %value = extractelement <4 x i32> %ld, i32 %offset + store i32 %value, i32 addrspace(1)* %out + ret void +} + +; CHECK-LABL: {{^}}extract_out_of_bounds_offset: +; CHECK: buffer_load_dwordx4 v{{\[}}[[LO_ELT:[0-9]+]]:[[HI_ELT:[0-9]+]]{{\]}} +; CHECK: s_load_dword [[IDX:s[0-9]+]] +; CHECK: s_add_i32 m0, [[IDX]], 4 +; CHECK-NEXT: v_movrels_b32_e32 [[EXTRACT:v[0-9]+]], v[[LO_ELT]] +; CHECK: buffer_store_dword [[EXTRACT]] +define void @extract_out_of_bounds_offset(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %idx) { +entry: + %ld = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in + %offset = add i32 %idx, 4 + %value = extractelement <4 x i32> %ld, i32 %offset + store i32 %value, i32 addrspace(1)* %out + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() #1 attributes #0 = { nounwind }