]> granicus.if.org Git - llvm/commitdiff
AMDGPU: Don't enable all lanes with non-CSR VGPR spills
authorMatt Arsenault <Matthew.Arsenault@amd.com>
Tue, 28 May 2019 16:46:02 +0000 (16:46 +0000)
committerMatt Arsenault <Matthew.Arsenault@amd.com>
Tue, 28 May 2019 16:46:02 +0000 (16:46 +0000)
If the only VGPRs used for SGPR spilling were not CSRs, this was
enabling all laness and immediately restoring exec. This is the usual
situation in leaf functions.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@361848 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/AMDGPU/SIFrameLowering.cpp
test/CodeGen/AMDGPU/callee-frame-setup.ll

index 1eea77be6200eec8801b02ee2ed247157d21eb61..e333154f83bfd641b1eb9e108e00658297b71129 100644 (file)
@@ -613,30 +613,36 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
       .setMIFlag(MachineInstr::FrameSetup);
   }
 
-  if (!FuncInfo->getSGPRSpillVGPRs().empty()) {
-    if (LiveRegs.empty()) {
-      LiveRegs.init(TRI);
-      LiveRegs.addLiveIns(MBB);
-    }
+  // To avoid clobbering VGPRs in lanes that weren't active on function entry,
+  // turn on all lanes before doing the spill to memory.
+  unsigned ScratchExecCopy = AMDGPU::NoRegister;
 
-    // To avoid clobbering VGPRs in lanes that weren't active on function entry,
-    // turn on all lanes before doing the spill to memory.
-    unsigned ScratchExecCopy
-      = findScratchNonCalleeSaveRegister(MF, LiveRegs,
-                                         AMDGPU::SReg_64_XEXECRegClass);
-
-    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), ScratchExecCopy)
-      .addImm(-1);
-
-    for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
-           : FuncInfo->getSGPRSpillVGPRs()) {
-      if (!Reg.FI.hasValue())
-        continue;
-      TII->storeRegToStackSlot(MBB, MBBI, Reg.VGPR, true,
-                               Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
-                               &TII->getRegisterInfo());
+  for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
+         : FuncInfo->getSGPRSpillVGPRs()) {
+    if (!Reg.FI.hasValue())
+      continue;
+
+    if (ScratchExecCopy == AMDGPU::NoRegister) {
+      if (LiveRegs.empty()) {
+        LiveRegs.init(TRI);
+        LiveRegs.addLiveIns(MBB);
+      }
+
+      ScratchExecCopy
+        = findScratchNonCalleeSaveRegister(MF, LiveRegs,
+                                           AMDGPU::SReg_64_XEXECRegClass);
+
+      BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64),
+              ScratchExecCopy)
+        .addImm(-1);
     }
 
+    TII->storeRegToStackSlot(MBB, MBBI, Reg.VGPR, true,
+                             Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
+                             &TII->getRegisterInfo());
+  }
+
+  if (ScratchExecCopy != AMDGPU::NoRegister) {
     // FIXME: Split block and make terminator.
     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
       .addReg(ScratchExecCopy);
@@ -654,27 +660,31 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
   DebugLoc DL;
 
-  if (!FuncInfo->getSGPRSpillVGPRs().empty()) {
-    // See emitPrologue
-    LivePhysRegs LiveRegs(*ST.getRegisterInfo());
-    LiveRegs.addLiveIns(MBB);
+  unsigned ScratchExecCopy = AMDGPU::NoRegister;
+  for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
+         : FuncInfo->getSGPRSpillVGPRs()) {
+    if (!Reg.FI.hasValue())
+      continue;
 
-    unsigned ScratchExecCopy
-      = findScratchNonCalleeSaveRegister(MF, LiveRegs,
-                                         AMDGPU::SReg_64_XEXECRegClass);
-
-    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), ScratchExecCopy)
-      .addImm(-1);
-
-    for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
-           : FuncInfo->getSGPRSpillVGPRs()) {
-      if (!Reg.FI.hasValue())
-        continue;
-      TII->loadRegFromStackSlot(MBB, MBBI, Reg.VGPR,
-                                Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
-                                &TII->getRegisterInfo());
+    if (ScratchExecCopy == AMDGPU::NoRegister) {
+      // See emitPrologue
+      LivePhysRegs LiveRegs(*ST.getRegisterInfo());
+      LiveRegs.addLiveIns(MBB);
+
+      ScratchExecCopy
+        = findScratchNonCalleeSaveRegister(MF, LiveRegs,
+                                           AMDGPU::SReg_64_XEXECRegClass);
+
+      BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), ScratchExecCopy)
+        .addImm(-1);
     }
 
+    TII->loadRegFromStackSlot(MBB, MBBI, Reg.VGPR,
+                              Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
+                              &TII->getRegisterInfo());
+  }
+
+  if (ScratchExecCopy != AMDGPU::NoRegister) {
     // FIXME: Split block and make terminator.
     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
       .addReg(ScratchExecCopy);
index ebd6f96a5b836a26d38a308b261a31eb08126d55..bc9160772e2cbb55c2020cb9fda152078035840c 100644 (file)
@@ -135,5 +135,21 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 {
   ret void
 }
 
+; Has no spilled CSR VGPRs used for SGPR spilling, so no need to
+; enable all lanes and restore.
+
+; GCN-LABEL: {{^}}spill_only_csr_sgpr:
+; GCN: s_waitcnt
+; GCN-NEXT: v_writelane_b32 v0, s42, 0
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; clobber s42
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_readlane_b32 s42, v0, 0
+; GCN-NEXT: s_setpc_b64
+define void @spill_only_csr_sgpr() {
+  call void asm sideeffect "; clobber s42", "~{s42}"()
+  ret void
+}
+
 attributes #0 = { nounwind }
 attributes #1 = { nounwind "no-frame-pointer-elim"="true" }