From: Matt Arsenault Date: Mon, 26 Jun 2017 17:53:59 +0000 (+0000) Subject: AMDGPU: Setup SP/FP in callee function prolog/epilog X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8e828b87b2da64499a5b1d2dc22c3dd0071e69bc;p=llvm AMDGPU: Setup SP/FP in callee function prolog/epilog git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306312 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp index 479a321d305..08a64de3850 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -417,14 +417,69 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, void SIFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { - const SIMachineFunctionInfo *MFI = MF.getInfo(); - if (MFI->isEntryFunction()) + const SIMachineFunctionInfo *FuncInfo = MF.getInfo(); + if (FuncInfo->isEntryFunction()) { emitEntryFunctionPrologue(MF, MBB); + return; + } + + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const SISubtarget &ST = MF.getSubtarget(); + const SIInstrInfo *TII = ST.getInstrInfo(); + + unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg(); + unsigned FramePtrReg = FuncInfo->getFrameOffsetReg(); + + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc DL; + + bool NeedFP = hasFP(MF); + if (NeedFP) { + // If we need a base pointer, set it up here. It's whatever the value of + // the stack pointer is at this point. Any variable size objects will be + // allocated after this, so we can still use the base pointer to reference + // locals. + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) + .addReg(StackPtrReg) + .setMIFlag(MachineInstr::FrameSetup); + } + + uint32_t NumBytes = MFI.getStackSize(); + if (NumBytes != 0 && hasSP(MF)) { + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg) + .addReg(StackPtrReg) + .addImm(NumBytes * ST.getWavefrontSize()) + .setMIFlag(MachineInstr::FrameSetup); + } } void SIFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { + const SIMachineFunctionInfo *FuncInfo = MF.getInfo(); + if (FuncInfo->isEntryFunction()) + return; + unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg(); + if (StackPtrReg == AMDGPU::NoRegister) + return; + + const MachineFrameInfo &MFI = MF.getFrameInfo(); + uint32_t NumBytes = MFI.getStackSize(); + + const SISubtarget &ST = MF.getSubtarget(); + const SIInstrInfo *TII = ST.getInstrInfo(); + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + DebugLoc DL; + + // FIXME: Clarify distinction between no set SP and SP. For callee functions, + // it's really whether we need SP to be accurate or not. + + if (NumBytes != 0 && hasSP(MF)) { + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg) + .addReg(StackPtrReg) + .addImm(NumBytes * ST.getWavefrontSize()) + .setMIFlag(MachineInstr::FrameDestroy); + } } static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) { @@ -557,3 +612,19 @@ void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF, WorkItemIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI); } } + +bool SIFrameLowering::hasFP(const MachineFunction &MF) const { + // All stack operations are relative to the frame offset SGPR. + // TODO: Still want to eliminate sometimes. + const MachineFrameInfo &MFI = MF.getFrameInfo(); + + // XXX - Is this only called after frame is finalized? Should be able to check + // frame size. + return MFI.hasStackObjects() && !allStackObjectsAreDead(MFI); +} + +bool SIFrameLowering::hasSP(const MachineFunction &MF) const { + // All stack operations are relative to the frame offset SGPR. + const MachineFrameInfo &MFI = MF.getFrameInfo(); + return MFI.hasCalls() || MFI.hasVarSizedObjects(); +} diff --git a/lib/Target/AMDGPU/SIFrameLowering.h b/lib/Target/AMDGPU/SIFrameLowering.h index e17adbe2736..d4dfa1c7eaa 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.h +++ b/lib/Target/AMDGPU/SIFrameLowering.h @@ -60,6 +60,10 @@ private: /// \brief Emits debugger prologue. void emitDebuggerPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const; + +public: + bool hasFP(const MachineFunction &MF) const override; + bool hasSP(const MachineFunction &MF) const; }; } // end namespace llvm diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 71f07bad347..3203c38dae3 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -86,6 +86,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3; ScratchWaveOffsetReg = AMDGPU::SGPR4; FrameOffsetReg = AMDGPU::SGPR5; + StackPtrOffsetReg = AMDGPU::SGPR32; return; } diff --git a/test/CodeGen/AMDGPU/callee-frame-setup.ll b/test/CodeGen/AMDGPU/callee-frame-setup.ll new file mode 100644 index 00000000000..6c3594bb44e --- /dev/null +++ b/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI %s + +; GCN-LABEL: {{^}}callee_no_stack: +; GCN: ; BB#0: +; GCN-NEXT: s_waitcnt +; GCN-NEXT: s_setpc_b64 +define void @callee_no_stack() #0 { + ret void +} + +; Requires frame pointer for access to local regular object. + +; GCN-LABEL: {{^}}callee_with_stack: +; GCN: ; BB#0: +; GCN-NEXT: s_waitcnt +; GCN-NEXT: s_mov_b32 s5, s32 +; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}} +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s5 offset:4{{$}} +; GCN-NEXT: s_waitcnt +; GCN-NEXT: s_setpc_b64 +define void @callee_with_stack() #0 { + %alloca = alloca i32 + store volatile i32 0, i32* %alloca + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/frame-index-elimination.ll b/test/CodeGen/AMDGPU/frame-index-elimination.ll index b49d8e2d89d..25cbb7b105f 100644 --- a/test/CodeGen/AMDGPU/frame-index-elimination.ll +++ b/test/CodeGen/AMDGPU/frame-index-elimination.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; Test that non-entry function frame indices are expanded properly to ; give an index relative to the scratch wave offset register @@ -71,8 +71,9 @@ define void @func_load_private_arg_i32_ptr(i32* %ptr) #0 { ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr: ; GCN: s_waitcnt -; GCN-NEXT: s_sub_u32 s6, s5, s4 -; GCN-NEXT: v_lshr_b32_e64 v0, s6, 6 +; GCN-NEXT: s_mov_b32 s5, s32 +; GCN-NEXT: s_sub_u32 [[SUB:s[0-9]+]], s5, s4 +; GCN-NEXT: v_lshr_b32_e64 v0, [[SUB]], 6 ; GCN-NEXT: v_add_i32_e32 v0, vcc, 4, v0 ; GCN-NOT: v_mov ; GCN: ds_write_b32 v0, v0 @@ -86,6 +87,7 @@ define void @void_func_byval_struct_i8_i32_ptr({ i8, i32 }* byval %arg0) #0 { ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr_value: ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s5, s32 ; GCN-NEXT: buffer_load_ubyte v0, off, s[0:3], s5 ; GCN_NEXT: buffer_load_dword v1, off, s[0:3], s5 offset:4 define void @void_func_byval_struct_i8_i32_ptr_value({ i8, i32 }* byval %arg0) #0 {