From a2a1a4f194af9274ce6719a4970ffa8b29bdefde Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Wed, 6 Jul 2016 08:35:17 +0000 Subject: [PATCH] AMDGPU: Fix return of non-void-returning shaders Summary: Since "AMDGPU: Fix verifier errors in SILowerControlFlow", the logic that ensures that a non-void-returning shader falls off the end of the last basic block was effectively disabled, since SI_RETURN is now used. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96731 Reviewers: arsenm, tstellarAMD Subscribers: arsenm, kzhuravl, llvm-commits Differential Revision: http://reviews.llvm.org/D21975 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@274612 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/SILowerControlFlow.cpp | 10 ++++------ test/CodeGen/AMDGPU/ret_jump.ll | 3 ++- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp index ae23a96b621..f65d7d87aae 100644 --- a/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -729,14 +729,13 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { break; - case AMDGPU::S_ENDPGM: { - if (MF.getInfo()->returnsVoid()) - break; + case AMDGPU::SI_RETURN: { + assert(!MF.getInfo()->returnsVoid()); // Graphics shaders returning non-void shouldn't contain S_ENDPGM, // because external bytecode will be appended at the end. if (BI != --MF.end() || I != MBB.getFirstTerminator()) { - // S_ENDPGM is not the last instruction. Add an empty block at + // SI_RETURN is not the last instruction. Add an empty block at // the end and jump there. if (!EmptyMBBAtEnd) { EmptyMBBAtEnd = MF.CreateMachineBasicBlock(); @@ -746,9 +745,8 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { MBB.addSuccessor(EmptyMBBAtEnd); BuildMI(*BI, I, MI.getDebugLoc(), TII->get(AMDGPU::S_BRANCH)) .addMBB(EmptyMBBAtEnd); + I->eraseFromParent(); } - - I->eraseFromParent(); break; } } diff --git a/test/CodeGen/AMDGPU/ret_jump.ll b/test/CodeGen/AMDGPU/ret_jump.ll index 1e1a757ec51..f7380cd9692 100644 --- a/test/CodeGen/AMDGPU/ret_jump.ll +++ b/test/CodeGen/AMDGPU/ret_jump.ll @@ -12,10 +12,11 @@ ; GCN-NEXT: ; mask branch [[UNREACHABLE_BB:BB[0-9]+_[0-9]+]] ; GCN: [[RET_BB]]: -; GCN-NEXT: ; return +; GCN-NEXT: s_branch [[FINAL_BB:BB[0-9]+_[0-9]+]] ; GCN-NEXT: [[UNREACHABLE_BB]]: ; GCN-NEXT: s_or_b64 exec, exec, [[XOR_EXEC]] +; GCN-NEXT: [[FINAL_BB]]: ; GCN-NEXT: .Lfunc_end0 define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <8 x i32>] addrspace(2)* byval, i32 addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: -- 2.50.1