From 4e0c4fb9c152a1e606bb30fccb37891c795bf66b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 5 Sep 2017 18:36:36 +0000 Subject: [PATCH] AMDGPU: Fix not accounting for tail call resource usage If the only call in a function is a tail call, the function isn't considered to have a call since it's a type of return. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@312561 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 3 ++- test/CodeGen/AMDGPU/sibling-call.ll | 31 ++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index d80b537a910..0facae0992b 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -500,7 +500,8 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( // If there are no calls, MachineRegisterInfo can tell us the used register // count easily. - if (!FrameInfo.hasCalls()) { + // A tail call isn't considered a call for MachineFrameInfo's purposes. + if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) { MCPhysReg HighestVGPRReg = AMDGPU::NoRegister; for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) { if (MRI.isPhysRegUsed(Reg)) { diff --git a/test/CodeGen/AMDGPU/sibling-call.ll b/test/CodeGen/AMDGPU/sibling-call.ll index 08c6dc9dbc1..deb59d0866b 100644 --- a/test/CodeGen/AMDGPU/sibling-call.ll +++ b/test/CodeGen/AMDGPU/sibling-call.ll @@ -11,6 +11,22 @@ define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 { ret i32 %add0 } +; GCN-LABEL: {{^}}i32_fastcc_i32_i32_stack_object: +; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN: s_mov_b32 s5, s32 +; GCN: v_add_i32_e32 v0, vcc, v1, v +; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:24 +; GCN: s_waitcnt vmcnt(0) +; GCN: s_setpc_b64 +; GCN: ; ScratchSize: 68 +define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 { + %alloca = alloca [16 x i32], align 4 + %gep = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 5 + store volatile i32 9, i32* %gep + %add0 = add i32 %arg0, %arg1 + ret i32 %add0 +} + ; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_i32: define fastcc i32 @sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %c) #1 { entry: @@ -22,6 +38,7 @@ entry: ; GCN: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 ; GCN: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:24 ; GCN: s_setpc_b64 +; GCN: ; ScratchSize: 68 define fastcc i32 @sibling_call_i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b, i32 %c) #1 { entry: %alloca = alloca [16 x i32], align 4 @@ -31,6 +48,20 @@ entry: ret i32 %ret } +; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_i32_callee_stack_object: +; GCN: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 +; GCN: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:24 +; GCN: s_setpc_b64 +; GCN: ; ScratchSize: 136 +define fastcc i32 @sibling_call_i32_fastcc_i32_i32_callee_stack_object(i32 %a, i32 %b, i32 %c) #1 { +entry: + %alloca = alloca [16 x i32], align 4 + %gep = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 5 + store volatile i32 9, i32* %gep + %ret = tail call fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b) + ret i32 %ret +} + ; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_i32_unused_result: define fastcc void @sibling_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) #1 { entry: -- 2.40.0