From 4e75329928c91f67ec82cb672497d5cf019f905f Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Wed, 17 Jul 2019 11:22:57 +0000 Subject: [PATCH] AMDGPU/GFX10: Apply the VMEM-to-scalar-write hazard also to writes to EXEC Summary: Change-Id: I854fbf7d48e937bef9f8f3f5d0c8aeb970652630 Reviewers: rampitec, mareko Subscribers: arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64807 Change-Id: I4405b3a7f84186acea5a78d291bff71056e745fc git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366314 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 2 +- .../AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll | 1 + test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir | 14 ++++++++++++++ test/CodeGen/AMDGPU/wave32.ll | 2 ++ 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index a23348e18f9..885239e2fae 100644 --- a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -920,7 +920,7 @@ bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) { for (const MachineOperand &Def : MI->defs()) { MachineOperand *Op = I->findRegisterUseOperand(Def.getReg(), false, TRI); - if (!Op || (Op->isImplicit() && Op->getReg() == AMDGPU::EXEC)) + if (!Op) continue; return true; } diff --git a/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll b/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll index f35b0b43d36..0f04c0c445f 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll +++ b/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll @@ -14,6 +14,7 @@ define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) { ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: global_store_dword v[0:1], v0, off ; GCN-NEXT: BB0_2: ; %bb +; GCN-NEXT: v_nop ; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: global_store_dword v[0:1], v0, off diff --git a/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir b/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir index 630070c13a3..9d45c5b19e6 100644 --- a/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir +++ b/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir @@ -92,6 +92,7 @@ body: | ... # GCN-LABEL: name: vmem_write_exec_impread # GCN: BUFFER_LOAD_DWORD_OFFEN +# GCN: V_NOP # GCN-NEXT: S_MOV_B64 --- name: vmem_write_exec_impread @@ -208,3 +209,16 @@ body: | $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec S_BRANCH %bb.0 ... +# GCN-LABEL: name: ds_write_exec +# GCN: DS_WRITE_B32_gfx9 +# GCN-NEXT: V_NOP +# GCN-NEXT: S_MOV_B32 +--- +name: ds_write_exec +body: | + bb.0: + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + DS_WRITE_B32_gfx9 $vgpr0, $vgpr1, 0, 0, implicit $exec + $exec_lo = S_MOV_B32 -1 +... diff --git a/test/CodeGen/AMDGPU/wave32.ll b/test/CodeGen/AMDGPU/wave32.ll index 781ebbb268f..a71ca5db765 100644 --- a/test/CodeGen/AMDGPU/wave32.ll +++ b/test/CodeGen/AMDGPU/wave32.ll @@ -1073,6 +1073,7 @@ declare void @external_void_func_void() #1 ; GFX1064-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GFX1032-NEXT: s_or_saveexec_b32 [[COPY_EXEC0:s[0-9]]], -1{{$}} ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill +; GCN-NEXT: v_nop ; GFX1064-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GFX1032-NEXT: s_mov_b32 exec_lo, [[COPY_EXEC0]] @@ -1095,6 +1096,7 @@ declare void @external_void_func_void() #1 ; GFX1064: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GFX1032: s_or_saveexec_b32 [[COPY_EXEC1:s[0-9]]], -1{{$}} ; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload +; GCN-NEXT: v_nop ; GFX1064-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GFX1032-NEXT: s_mov_b32 exec_lo, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) -- 2.40.0