From 220e7b197a6376839d40f616d85612ff1dd11ee3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Jun 2019 20:35:18 +0000 Subject: [PATCH] [AMDGPU] Fix Livereg computation during epilogue insertion The LivePhysRegs calculated in order to find a scratch register in the epilogue code wrongly uses 'LiveIns'. Instead, it should use the 'Liveout' sets. For the liveness, also considering the operands of the terminator (return) instruction which is the insertion point for the scratch-exec-copy instruction. Patch by Christudasan Devadasan git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364470 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/SIFrameLowering.cpp | 3 ++- test/CodeGen/AMDGPU/callee-special-input-sgprs.ll | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp index 098152b23aa..3ed6d75c045 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -678,7 +678,8 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF, if (ScratchExecCopy == AMDGPU::NoRegister) { // See emitPrologue LivePhysRegs LiveRegs(*ST.getRegisterInfo()); - LiveRegs.addLiveIns(MBB); + LiveRegs.addLiveOuts(MBB); + LiveRegs.stepBackward(*MBBI); ScratchExecCopy = findScratchNonCalleeSaveRegister(MF, LiveRegs, diff --git a/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll b/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll index 5368eb20300..ba099c33fc2 100644 --- a/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll +++ b/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll @@ -489,6 +489,7 @@ define amdgpu_kernel void @kern_indirect_use_every_sgpr_input() #1 { ; GCN-NOT: s[8:9] ; GCN-NOT: s[10:11] ; GCN-NOT: s[12:13] +; GCN: s_or_saveexec_b64 s[6:7], -1 define hidden void @func_indirect_use_every_sgpr_input() #1 { call void @use_every_sgpr_input() ret void -- 2.40.0