From: Matt Arsenault Date: Wed, 27 Mar 2019 17:14:32 +0000 (+0000) Subject: AMDGPU: Enable the scavenger for large frames X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0755a8d19c51909534cca7b02e1db02421998049;p=llvm AMDGPU: Enable the scavenger for large frames Another test is needed for the case where the scavenge fail, but there's another issue with that which needs an additional fix. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@357093 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index 8e49b647aa7..92fbbcdc93d 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -257,11 +257,20 @@ bool SIRegisterInfo::requiresFrameIndexScavenging( bool SIRegisterInfo::requiresFrameIndexReplacementScavenging( const MachineFunction &MF) const { - // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't - // create a virtual register for it during frame index elimination, so the - // scavenger is directly needed. - return MF.getFrameInfo().hasStackObjects() && - MF.getSubtarget().hasScalarStores() && + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (!MFI.hasStackObjects()) + return false; + + // The scavenger is used for large frames which may require finding a free + // register for large offsets. + if (!isUInt<12>(MFI.getStackSize())) + return true; + + // If using scalar stores, for spills, m0 is needed for the scalar store + // offset (pre-GFX9). m0 is unallocatable, so we can't create a virtual + // register for it during frame index elimination, so the scavenger is + // directly needed. + return MF.getSubtarget().hasScalarStores() && MF.getInfo()->hasSpilledSGPRs(); } diff --git a/test/CodeGen/AMDGPU/spill-offset-calculation.ll b/test/CodeGen/AMDGPU/spill-offset-calculation.ll index 3c179b580fc..14cbf5518e1 100644 --- a/test/CodeGen/AMDGPU/spill-offset-calculation.ll +++ b/test/CodeGen/AMDGPU/spill-offset-calculation.ll @@ -35,9 +35,8 @@ entry: %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 ; 0x40000 / 64 = 4096 (for wave64) - ; CHECK: s_add_u32 s7, s7, 0x40000 - ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s7 ; 4-byte Folded Spill - ; CHECK: s_sub_u32 s7, s7, 0x40000 + ; CHECK: s_add_u32 s6, s7, 0x40000 + ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 ; 4-byte Folded Spill %a = load volatile i32, i32 addrspace(5)* %aptr ; Force %a to spill @@ -88,10 +87,9 @@ entry: %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)* ; 0x3ff00 / 64 = 4092 (for wave64) - ; CHECK: s_add_u32 s7, s7, 0x3ff00 - ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s7 ; 4-byte Folded Spill - ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s7 offset:4 ; 4-byte Folded Spill - ; CHECK: s_sub_u32 s7, s7, 0x3ff00 + ; CHECK: s_add_u32 s6, s7, 0x3ff00 + ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 ; 4-byte Folded Spill + ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 offset:4 ; 4-byte Folded Spill %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1 %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr @@ -139,9 +137,8 @@ entry: %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 ; 0x40000 / 64 = 4096 (for wave64) - ; CHECK: s_add_u32 s5, s5, 0x40000 - ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s5 ; 4-byte Folded Spill - ; CHECK: s_sub_u32 s5, s5, 0x40000 + ; CHECK: s_add_u32 s6, s5, 0x40000 + ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 ; 4-byte Folded Spill %a = load volatile i32, i32 addrspace(5)* %aptr ; Force %a to spill @@ -192,10 +189,9 @@ entry: %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)* ; 0x3ff00 / 64 = 4092 (for wave64) - ; CHECK: s_add_u32 s5, s5, 0x3ff00 - ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s5 ; 4-byte Folded Spill - ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s5 offset:4 ; 4-byte Folded Spill - ; CHECK: s_sub_u32 s5, s5, 0x3ff00 + ; CHECK: s_add_u32 s6, s5, 0x3ff00 + ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 ; 4-byte Folded Spill + ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 offset:4 ; 4-byte Folded Spill %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1 %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr