From: Matt Arsenault Date: Tue, 29 Nov 2016 19:20:48 +0000 (+0000) Subject: AMDGPU: Materialize frame index before add X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8d2aadbfac9721049cb96faf1e06a64604572371;p=llvm AMDGPU: Materialize frame index before add It isn't generally safe to fold the frame index directly into the operand since it will possibly not be an inline immediate after it is expanded. This surprisingly seems to produce better code, since the FI doesn't prevent folding other immediate operands. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@288185 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index 1d933da47c2..936bb8b656f 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -245,12 +245,17 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) .addImm(Offset); + BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg) + .addFrameIndex(FrameIdx); + BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_ADD_I32_e64), BaseReg) .addReg(UnusedCarry, RegState::Define | RegState::Dead) .addReg(OffsetReg, RegState::Kill) - .addFrameIndex(FrameIdx); + .addReg(FIReg); } void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, diff --git a/test/CodeGen/AMDGPU/local-stack-slot-bug.ll b/test/CodeGen/AMDGPU/local-stack-slot-bug.ll index dc01f76108e..92c592b5e26 100644 --- a/test/CodeGen/AMDGPU/local-stack-slot-bug.ll +++ b/test/CodeGen/AMDGPU/local-stack-slot-bug.ll @@ -7,15 +7,16 @@ ; ; CHECK-LABEL: {{^}}main: +; CHECK: v_mov_b32_e32 [[BASE_FI:v[0-9]+]], 0{{$}} + ; FIXME: add 0? -; CHECK-DAG: s_movk_i32 [[K0:s[0-9]+]], 0x140 -; CHECK-DAG: v_add_i32_e64 [[ADD_K0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, [[K0]], 0 +; CHECK-DAG: v_add_i32_e32 [[ADD_K0:v[0-9]+]], vcc, 0x140, [[BASE_FI]] ; CHECK-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0 ; CHECK-DAG: buffer_store_dword {{v[0-9]+}}, [[ADD_K0]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}} ; CHECK-DAG: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, 0x200, [[BYTES]] -; CHECK-DAG: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, 0, [[BYTES]] +; CHECK-DAG: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, [[BASE_FI]], [[BYTES]] ; CHECK: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen ; CHECK: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen