AMDGPU: Don't add emergency stack slot if all spills are SGPR->VGPR

author Matt Arsenault <Matthew.Arsenault@amd.com>

Wed, 22 Feb 2017 22:23:32 +0000 (22:23 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Wed, 22 Feb 2017 22:23:32 +0000 (22:23 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Wed, 22 Feb 2017 22:23:32 +0000 (22:23 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Wed, 22 Feb 2017 22:23:32 +0000 (22:23 +0000)
diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp

index 80ba15a64a554ab3799627221b842a9ce5409d02..68bac8af3e4ca36ebbe15b9bf7435ebd41e0cc74 100644 (file)
--- a/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -383,6 +383,16 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
  
  }
  
+static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
+  for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
+       I != E; ++I) {
+    if (!MFI.isDeadObjectIndex(I))
+      return false;
+  }
+
+  return true;
+}
+
  void SIFrameLowering::processFunctionBeforeFrameFinalized(
    MachineFunction &MF,
    RegScavenger *RS) const {
@@ -391,8 +401,51 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
    if (!MFI.hasStackObjects())
      return;
  
-  bool MayNeedScavengingEmergencySlot = MFI.hasStackObjects();
-  if (MayNeedScavengingEmergencySlot) {
+  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  const SIRegisterInfo &TRI = TII->getRegisterInfo();
+  SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+  bool AllSGPRSpilledToVGPRs = false;
+
+  if (TRI.spillSGPRToVGPR() && FuncInfo->hasSpilledSGPRs()) {
+    AllSGPRSpilledToVGPRs = true;
+
+    // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
+    // are spilled to VGPRs, in which case we can eliminate the stack usage.
+    //
+    // XXX - This operates under the assumption that only other SGPR spills are
+    // users of the frame index. I'm not 100% sure this is correct. The
+    // StackColoring pass has a comment saying a future improvement would be to
+    // merging of allocas with spill slots, but for now according to
+    // MachineFrameInfo isSpillSlot can't alias any other object.
+    for (MachineBasicBlock &MBB : MF) {
+      MachineBasicBlock::iterator Next;
+      for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
+        MachineInstr &MI = *I;
+        Next = std::next(I);
+
+        if (TII->isSGPRSpill(MI)) {
+          int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
+          if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
+            bool Spilled = TRI.eliminateSGPRToVGPRSpillFrameIndex(MI, FI, RS);
+            (void)Spilled;
+            assert(Spilled && "failed to spill SGPR to VGPR when allocated");
+          } else
+            AllSGPRSpilledToVGPRs = false;
+        }
+      }
+    }
+
+    FuncInfo->removeSGPRToVGPRFrameIndices(MFI);
+  }
+
+  // FIXME: The other checks should be redundant with allStackObjectsAreDead,
+  // but currently hasNonSpillStackObjects is set only from source
+  // allocas. Stack temps produced from legalization are not counted currently.
+  if (FuncInfo->hasNonSpillStackObjects() || FuncInfo->hasSpilledVGPRs() ||
+      !AllSGPRSpilledToVGPRs || !allStackObjectsAreDead(MFI)) {
+    assert(RS && "RegScavenger required if spilling");
+
      // We force this to be at offset 0 so no user object ever has 0 as an
      // address, so we may use 0 as an invalid pointer value. This is because
      // LLVM assumes 0 is an invalid pointer in address space 0. Because alloca
@@ -410,40 +463,6 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
        AMDGPU::SGPR_32RegClass.getSize(), 0, false);
      RS->addScavengingFrameIndex(ScavengeFI);
    }
-
-  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
-  const SIInstrInfo *TII = ST.getInstrInfo();
-  const SIRegisterInfo &TRI = TII->getRegisterInfo();
-  if (!TRI.spillSGPRToVGPR())
-    return;
-
-  SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
-  if (!FuncInfo->hasSpilledSGPRs())
-    return;
-
-  // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
-  // are spilled to VGPRs, in which case we can eliminate the stack usage.
-  //
-  // XXX - This operates under the assumption that only other SGPR spills are
-  // users of the frame index. I'm not 100% sure this is correct. The
-  // StackColoring pass has a comment saying a future improvement would be to
-  // merging of allocas with spill slots, but for now according to
-  // MachineFrameInfo isSpillSlot can't alias any other object.
-  for (MachineBasicBlock &MBB : MF) {
-    MachineBasicBlock::iterator Next;
-    for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
-      MachineInstr &MI = *I;
-      Next = std::next(I);
-
-      if (TII->isSGPRSpill(MI)) {
-        int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
-        if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI))
-          TRI.eliminateSGPRToVGPRSpillFrameIndex(MI, FI, RS);
-      }
-    }
-  }
-
-  FuncInfo->removeSGPRToVGPRFrameIndices(MFI);
  }
  
  void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF,
diff --git a/test/CodeGen/AMDGPU/si-sgpr-spill.ll b/test/CodeGen/AMDGPU/si-sgpr-spill.ll

index bccddcd96876b6b91ab8ce1f30fbd7a4f57f33ac..bcaaba7240337156d8ef4c559ddf486752b5bcb9 100644 (file)
--- a/test/CodeGen/AMDGPU/si-sgpr-spill.ll
+++ b/test/CodeGen/AMDGPU/si-sgpr-spill.ll
@@ -18,13 +18,12 @@
  ; GCN: s_mov_b32 m0
  
  ; Make sure scratch space isn't being used for SGPR->VGPR spills
-; FIXME: Seem to be leaving behind unused emergency slot.
  
  ; Writing to M0 from an SMRD instruction will hang the GPU.
  ; GCN-NOT: s_buffer_load_dword m0
  ; GCN: s_endpgm
  
-; TOVGPR: ScratchSize: 4{{$}}
+; TOVGPR: ScratchSize: 0{{$}}
  define amdgpu_ps void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) {
  main_body:
    %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
@@ -768,7 +767,7 @@ ENDIF66:                                          ; preds = %LOOP65
  
  ; GCN-LABEL: {{^}}main1:
  ; GCN: s_endpgm
-; TOVGPR: ScratchSize: 4{{$}}
+; TOVGPR: ScratchSize: 0{{$}}
  define amdgpu_ps void @main1([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) {
  main_body:
    %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Wed, 22 Feb 2017 22:23:32 +0000 (22:23 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Wed, 22 Feb 2017 22:23:32 +0000 (22:23 +0000)
lib/Target/AMDGPU/SIFrameLowering.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/si-sgpr-spill.ll		patch \| blob \| history