}
+static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
+ for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
+ I != E; ++I) {
+ if (!MFI.isDeadObjectIndex(I))
+ return false;
+ }
+
+ return true;
+}
+
void SIFrameLowering::processFunctionBeforeFrameFinalized(
MachineFunction &MF,
RegScavenger *RS) const {
if (!MFI.hasStackObjects())
return;
- bool MayNeedScavengingEmergencySlot = MFI.hasStackObjects();
- if (MayNeedScavengingEmergencySlot) {
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const SIRegisterInfo &TRI = TII->getRegisterInfo();
+ SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+ bool AllSGPRSpilledToVGPRs = false;
+
+ if (TRI.spillSGPRToVGPR() && FuncInfo->hasSpilledSGPRs()) {
+ AllSGPRSpilledToVGPRs = true;
+
+ // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
+ // are spilled to VGPRs, in which case we can eliminate the stack usage.
+ //
+ // XXX - This operates under the assumption that only other SGPR spills are
+ // users of the frame index. I'm not 100% sure this is correct. The
+ // StackColoring pass has a comment saying a future improvement would be to
+ // merging of allocas with spill slots, but for now according to
+ // MachineFrameInfo isSpillSlot can't alias any other object.
+ for (MachineBasicBlock &MBB : MF) {
+ MachineBasicBlock::iterator Next;
+ for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
+ MachineInstr &MI = *I;
+ Next = std::next(I);
+
+ if (TII->isSGPRSpill(MI)) {
+ int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
+ if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
+ bool Spilled = TRI.eliminateSGPRToVGPRSpillFrameIndex(MI, FI, RS);
+ (void)Spilled;
+ assert(Spilled && "failed to spill SGPR to VGPR when allocated");
+ } else
+ AllSGPRSpilledToVGPRs = false;
+ }
+ }
+ }
+
+ FuncInfo->removeSGPRToVGPRFrameIndices(MFI);
+ }
+
+ // FIXME: The other checks should be redundant with allStackObjectsAreDead,
+ // but currently hasNonSpillStackObjects is set only from source
+ // allocas. Stack temps produced from legalization are not counted currently.
+ if (FuncInfo->hasNonSpillStackObjects() || FuncInfo->hasSpilledVGPRs() ||
+ !AllSGPRSpilledToVGPRs || !allStackObjectsAreDead(MFI)) {
+ assert(RS && "RegScavenger required if spilling");
+
// We force this to be at offset 0 so no user object ever has 0 as an
// address, so we may use 0 as an invalid pointer value. This is because
// LLVM assumes 0 is an invalid pointer in address space 0. Because alloca
AMDGPU::SGPR_32RegClass.getSize(), 0, false);
RS->addScavengingFrameIndex(ScavengeFI);
}
-
- const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
- const SIInstrInfo *TII = ST.getInstrInfo();
- const SIRegisterInfo &TRI = TII->getRegisterInfo();
- if (!TRI.spillSGPRToVGPR())
- return;
-
- SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
- if (!FuncInfo->hasSpilledSGPRs())
- return;
-
- // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
- // are spilled to VGPRs, in which case we can eliminate the stack usage.
- //
- // XXX - This operates under the assumption that only other SGPR spills are
- // users of the frame index. I'm not 100% sure this is correct. The
- // StackColoring pass has a comment saying a future improvement would be to
- // merging of allocas with spill slots, but for now according to
- // MachineFrameInfo isSpillSlot can't alias any other object.
- for (MachineBasicBlock &MBB : MF) {
- MachineBasicBlock::iterator Next;
- for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
- MachineInstr &MI = *I;
- Next = std::next(I);
-
- if (TII->isSGPRSpill(MI)) {
- int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
- if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI))
- TRI.eliminateSGPRToVGPRSpillFrameIndex(MI, FI, RS);
- }
- }
- }
-
- FuncInfo->removeSGPRToVGPRFrameIndices(MFI);
}
void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF,
; GCN: s_mov_b32 m0
; Make sure scratch space isn't being used for SGPR->VGPR spills
-; FIXME: Seem to be leaving behind unused emergency slot.
; Writing to M0 from an SMRD instruction will hang the GPU.
; GCN-NOT: s_buffer_load_dword m0
; GCN: s_endpgm
-; TOVGPR: ScratchSize: 4{{$}}
+; TOVGPR: ScratchSize: 0{{$}}
define amdgpu_ps void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) {
main_body:
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
; GCN-LABEL: {{^}}main1:
; GCN: s_endpgm
-; TOVGPR: ScratchSize: 4{{$}}
+; TOVGPR: ScratchSize: 0{{$}}
define amdgpu_ps void @main1([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) {
main_body:
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0