From: Michael Liao Date: Thu, 4 Jul 2019 13:29:45 +0000 (+0000) Subject: [AMDGPU] Correct the setting of `FlatScratchInit`. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=463be50e14278f6218eeaf8e47c19203b5d1805e;p=llvm [AMDGPU] Correct the setting of `FlatScratchInit`. Summary: - That flag setting should skip spilling stack slot. Reviewers: arsenm, rampitec Subscribers: qcolombet, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64143 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@365137 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index f7d61725110..b73feadd521 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -145,9 +145,20 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) KernargSegmentPtr = true; if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) { + auto hasNonSpillStackObjects = [&]() { + // Avoid expensive checking if there's no stack objects. + if (!HasStackObjects) + return false; + for (auto OI = FrameInfo.getObjectIndexBegin(), + OE = FrameInfo.getObjectIndexEnd(); OI != OE; ++OI) + if (!FrameInfo.isSpillSlotObjectIndex(OI)) + return true; + // All stack objects are spill slots. + return false; + }; // TODO: This could be refined a lot. The attribute is a poor way of // detecting calls that may require it before argument lowering. - if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch")) + if (hasNonSpillStackObjects() || F.hasFnAttribute("amdgpu-flat-scratch")) FlatScratchInit = true; } diff --git a/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir b/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir index 3c1e565ea56..e366a0ebb0e 100644 --- a/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir +++ b/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir @@ -41,7 +41,7 @@ constants: body: | bb.0: successors: %bb.1, %bb.2 - liveins: $vgpr0, $sgpr4_sgpr5 + liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr13 $vgpr1_vgpr2 = COPY killed $sgpr4_sgpr5, implicit $exec $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`) @@ -66,6 +66,7 @@ body: | bb.2: successors: + liveins: $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr13 $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (load 8 from %stack.0, align 4, addrspace 5) $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc @@ -105,7 +106,7 @@ constants: body: | bb.0: successors: %bb.1, %bb.2 - liveins: $vgpr0, $sgpr4_sgpr5 + liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr13 $vgpr1_vgpr2 = COPY killed $sgpr4_sgpr5, implicit $exec $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`) @@ -130,6 +131,7 @@ body: | bb.2: successors: + liveins: $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr13 $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (load 8 from %stack.0, align 4, addrspace 5) $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc