From 56db90276b450c62741bd70d72732d4bcc13884c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 11 Apr 2017 22:29:28 +0000 Subject: [PATCH] AMDGPU: Refactor SIMachineFunctionInfo slightly Prepare for handling non-entry functions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@299999 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 40 +++++++++++++-------- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 12 +++++++ test/CodeGen/AMDGPU/calling-conventions.ll | 14 ++++---- 4 files changed, 45 insertions(+), 23 deletions(-) diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 4696f645cf8..eda825d8c6e 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1171,7 +1171,7 @@ SDValue SITargetLowering::LowerFormalArguments( *DAG.getContext()); bool IsShader = AMDGPU::isShader(CallConv); - bool IsKernel = !IsShader; + bool IsKernel = AMDGPU::isKernel(CallConv); bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CallConv); if (IsShader) { diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index debb42fc0fc..8e612d2ddfd 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -75,34 +75,48 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) PrivateMemoryInputPtr(false) { const SISubtarget &ST = MF.getSubtarget(); const Function *F = MF.getFunction(); + FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F); + WavesPerEU = ST.getWavesPerEU(*F); - PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); + // Non-entry functions have no special inputs for now. + // TODO: Return early for non-entry CCs. - const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); + CallingConv::ID CC = F->getCallingConv(); + if (CC == CallingConv::AMDGPU_PS) + PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); - if (!AMDGPU::isShader(F->getCallingConv())) { + if (AMDGPU::isKernel(CC)) { KernargSegmentPtr = true; WorkGroupIDX = true; WorkItemIDX = true; } - if (F->hasFnAttribute("amdgpu-work-group-id-y") || ST.debuggerEmitPrologue()) + if (ST.debuggerEmitPrologue()) { + // Enable everything. WorkGroupIDY = true; - - if (F->hasFnAttribute("amdgpu-work-group-id-z") || ST.debuggerEmitPrologue()) WorkGroupIDZ = true; - - if (F->hasFnAttribute("amdgpu-work-item-id-y") || ST.debuggerEmitPrologue()) WorkItemIDY = true; - - if (F->hasFnAttribute("amdgpu-work-item-id-z") || ST.debuggerEmitPrologue()) WorkItemIDZ = true; + } else { + if (F->hasFnAttribute("amdgpu-work-group-id-y")) + WorkGroupIDY = true; + + if (F->hasFnAttribute("amdgpu-work-group-id-z")) + WorkGroupIDZ = true; + + if (F->hasFnAttribute("amdgpu-work-item-id-y")) + WorkItemIDY = true; + + if (F->hasFnAttribute("amdgpu-work-item-id-z")) + WorkItemIDZ = true; + } // X, XY, and XYZ are the only supported combinations, so make sure Y is // enabled if Z is. if (WorkItemIDZ) WorkItemIDY = true; + const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); bool MaySpill = ST.isVGPRSpillingEnabled(*F); bool HasStackObjects = FrameInfo.hasStackObjects(); @@ -129,12 +143,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) // We don't need to worry about accessing spills with flat instructions. // TODO: On VI where we must use flat for global, we should be able to omit // this if it is never used for generic access. - if (HasStackObjects && ST.getGeneration() >= SISubtarget::SEA_ISLANDS && - ST.isAmdHsaOS()) + if (HasStackObjects && ST.hasFlatAddressSpace() && ST.isAmdHsaOS()) FlatScratchInit = true; - - FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F); - WavesPerEU = ST.getWavesPerEU(*F); } unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 4a43ecda177..d6c836eb748 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -258,6 +258,18 @@ bool isCompute(CallingConv::ID CC); LLVM_READNONE bool isEntryFunctionCC(CallingConv::ID CC); +// FIXME: Remove this when calling conventions cleaned up +LLVM_READNONE +inline bool isKernel(CallingConv::ID CC) { + switch (CC) { + case CallingConv::C: + case CallingConv::AMDGPU_KERNEL: + case CallingConv::SPIR_KERNEL: + return true; + default: + return false; + } +} bool isSI(const MCSubtargetInfo &STI); bool isCI(const MCSubtargetInfo &STI); diff --git a/test/CodeGen/AMDGPU/calling-conventions.ll b/test/CodeGen/AMDGPU/calling-conventions.ll index 55c2b503430..677147b6f4e 100644 --- a/test/CodeGen/AMDGPU/calling-conventions.ll +++ b/test/CodeGen/AMDGPU/calling-conventions.ll @@ -12,13 +12,13 @@ entry: } ; FIXME: This is treated like a kernel -; GCN-LABEL: {{^}}func: -; GCN: s_endpgm -define spir_func void @func(i32 addrspace(1)* %out) { -entry: - store i32 0, i32 addrspace(1)* %out - ret void -} +; XGCN-LABEL: {{^}}func: +; XGCN: s_endpgm +; define spir_func void @func(i32 addrspace(1)* %out) { +; entry: +; store i32 0, i32 addrspace(1)* %out +; ret void +; } ; GCN-LABEL: {{^}}ps_ret_cc_f16: ; SI: v_cvt_f16_f32_e32 v0, v0 -- 2.50.1