Using SplitCSR for the frame register was very broken. Often
the copies in the prolog and epilog were optimized out, in addition
to them being inserted after the true prolog where the FP
was clobbered.
I have a hacky solution which works that continues to use
split CSR, but for now this is simpler and will get to working
programs.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313274
91177308-0d34-0410-b5e6-
96231b3b80d8
const MCPhysReg *
SIRegisterInfo::getCalleeSavedRegsViaCopy(const MachineFunction *MF) const {
- // FIXME
- static MCPhysReg Regs[2];
-
- const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
- assert(!MFI->isEntryFunction());
-
- Regs[0] = MFI->getFrameOffsetReg();
- Regs[1] = AMDGPU::NoRegister;
-
- return Regs;
+ return nullptr;
}
const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
MachineFrameInfo &MFI = MF.getFrameInfo();
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SDValue CallerSavedFP;
+
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
if (!IsSibCall) {
SDValue ScratchWaveOffsetReg
= DAG.getCopyFromReg(Chain, DL, OffsetReg, MVT::i32);
RegsToPass.emplace_back(AMDGPU::SGPR4, ScratchWaveOffsetReg);
+
+ if (!Info->isEntryFunction()) {
+ // Avoid clobbering this function's FP value. In the current convention
+ // callee will overwrite this, so do save/restore around the call site.
+ CallerSavedFP = DAG.getCopyFromReg(Chain, DL,
+ Info->getFrameOffsetReg(), MVT::i32);
+ }
}
// Stack pointer relative accesses are done by changing the offset SGPR. This
Chain = Call.getValue(0);
InFlag = Call.getValue(1);
+ if (CallerSavedFP) {
+ SDValue FPReg = DAG.getRegister(Info->getFrameOffsetReg(), MVT::i32);
+ Chain = DAG.getCopyToReg(Chain, DL, FPReg, CallerSavedFP, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
uint64_t CalleePopBytes = 0;
Chain = DAG.getCALLSEQ_END(Chain, DAG.getTargetConstant(NumBytes, DL, MVT::i32),
DAG.getTargetConstant(CalleePopBytes, DL, MVT::i32),
; GCN: v_writelane_b32 v32, s37, 4
; GCN: s_mov_b32 s33, s5
-; GCN: s_swappc_b64
+; GCN-NEXT: s_swappc_b64
+; GCN-NEXT: s_mov_b32 s5, s33
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_mov_b32 s33, s5
; GCN-NEXT: s_swappc_b64
-; GCN: s_mov_b32 s5, s33
+; GCN-NEXT: s_mov_b32 s5, s33
; GCN: v_readlane_b32 s37, v32, 4
; GCN: v_readlane_b32 s36, v32, 3
; GCN: v_readlane_b32 s35, v32, 2
ret void
}
+; FIXME: Avoid extra restore of FP in between calls.
+; GCN-LABEL: {{^}}test_func_call_external_void_funcx2:
+; GCN: s_mov_b32 s33, s5
+; GCN-NEXT: s_swappc_b64
+; GCN-NEXT: s_mov_b32 s5, s33
+; GCN-NEXT: s_mov_b32 s33, s5
+; GCN-NEXT: s_swappc_b64
+; GCN-NEXT: s_mov_b32 s5, s33
+define void @test_func_call_external_void_funcx2() #0 {
+ call void @external_void_func_void()
+ call void @external_void_func_void()
+ ret void
+}
+
; GCN-LABEL: {{^}}void_func_void_clobber_s30_s31:
; GCN: s_waitcnt
; GCN-NEXT: s_mov_b64 [[SAVEPC:s\[[0-9]+:[0-9]+\]]], s[30:31]
ret void
}
+define void @void_func_void() noinline {
+ ret void
+}
+
+; Make sure we don't get save/restore of FP between calls.
+; GCN-LABEL: {{^}}test_funcx2:
+; GCN-NOT: s5
+; GCN-NOT: s32
+define void @test_funcx2() #0 {
+ call void @void_func_void()
+ call void @void_func_void()
+ ret void
+}
+
attributes #0 = { nounwind }
attributes #1 = { nounwind noinline }
; GCN-LABEL: {{^}}i32_fastcc_i32_i32_stack_object:
; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN: s_mov_b32 s5, s32
; GCN: v_add_i32_e32 v0, vcc, v1, v
+; GCN: s_mov_b32 s5, s32
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:24
; GCN: s_waitcnt vmcnt(0)
; GCN: s_setpc_b64