]> granicus.if.org Git - llvm/commitdiff
AMDGPU/GFX10: Apply the VMEM-to-scalar-write hazard also to writes to EXEC
authorNicolai Haehnle <nhaehnle@gmail.com>
Wed, 17 Jul 2019 11:22:57 +0000 (11:22 +0000)
committerNicolai Haehnle <nhaehnle@gmail.com>
Wed, 17 Jul 2019 11:22:57 +0000 (11:22 +0000)
Summary: Change-Id: I854fbf7d48e937bef9f8f3f5d0c8aeb970652630

Reviewers: rampitec, mareko

Subscribers: arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64807

Change-Id: I4405b3a7f84186acea5a78d291bff71056e745fc

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366314 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/AMDGPU/GCNHazardRecognizer.cpp
test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir
test/CodeGen/AMDGPU/wave32.ll

index a23348e18f92da43bc65e10c2ba0baec0f1ba3fe..885239e2faed35dff00ff6d43165119c6bc8875a 100644 (file)
@@ -920,7 +920,7 @@ bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
 
     for (const MachineOperand &Def : MI->defs()) {
       MachineOperand *Op = I->findRegisterUseOperand(Def.getReg(), false, TRI);
-      if (!Op || (Op->isImplicit() && Op->getReg() == AMDGPU::EXEC))
+      if (!Op)
         continue;
       return true;
     }
index f35b0b43d3694e16f31eae0755bec0eb8aba7b53..0f04c0c445f53094124e61a2b96f258f275f7386 100644 (file)
@@ -14,6 +14,7 @@ define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) {
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    global_store_dword v[0:1], v0, off
 ; GCN-NEXT:  BB0_2: ; %bb
+; GCN-NEXT:    v_nop
 ; GCN-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    global_store_dword v[0:1], v0, off
index 630070c13a3108c7981f67b16040dfcfd184aeeb..9d45c5b19e656d1eae005b2148a55b75708273a7 100644 (file)
@@ -92,6 +92,7 @@ body:             |
 ...
 # GCN-LABEL: name: vmem_write_exec_impread
 # GCN:      BUFFER_LOAD_DWORD_OFFEN
+# GCN:      V_NOP
 # GCN-NEXT: S_MOV_B64
 ---
 name:            vmem_write_exec_impread
@@ -208,3 +209,16 @@ body:             |
     $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
     S_BRANCH %bb.0
 ...
+# GCN-LABEL: name: ds_write_exec
+# GCN:      DS_WRITE_B32_gfx9
+# GCN-NEXT: V_NOP
+# GCN-NEXT: S_MOV_B32
+---
+name:            ds_write_exec
+body:             |
+  bb.0:
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    DS_WRITE_B32_gfx9 $vgpr0, $vgpr1, 0, 0, implicit $exec
+    $exec_lo = S_MOV_B32 -1
+...
index 781ebbb268fec1574a2f2b6af55ce6ab5b0992b1..a71ca5db7658dda1cb0378563593e80644993d54 100644 (file)
@@ -1073,6 +1073,7 @@ declare void @external_void_func_void() #1
 ; GFX1064-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
 ; GFX1032-NEXT: s_or_saveexec_b32 [[COPY_EXEC0:s[0-9]]], -1{{$}}
 ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-NEXT: v_nop
 ; GFX1064-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
 ; GFX1032-NEXT: s_mov_b32 exec_lo, [[COPY_EXEC0]]
 
@@ -1095,6 +1096,7 @@ declare void @external_void_func_void() #1
 ; GFX1064: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
 ; GFX1032: s_or_saveexec_b32 [[COPY_EXEC1:s[0-9]]], -1{{$}}
 ; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-NEXT: v_nop
 ; GFX1064-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
 ; GFX1032-NEXT: s_mov_b32 exec_lo, [[COPY_EXEC1]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)