]> granicus.if.org Git - llvm/commitdiff
[AMDGPU] Avoid predicated execution of the basic blocks containing scalar
authorAlexander Timofeev <Alexander.Timofeev@amd.com>
Tue, 3 Oct 2017 18:55:36 +0000 (18:55 +0000)
committerAlexander Timofeev <Alexander.Timofeev@amd.com>
Tue, 3 Oct 2017 18:55:36 +0000 (18:55 +0000)
instructions.

Differential revision: https://reviews.llvm.org/D38293

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@314828 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/AMDGPU/SIInsertSkips.cpp
test/CodeGen/AMDGPU/readlane_exec0.mir [new file with mode: 0644]

index ba346d2fad02cc71e5b32f5b79681c376973ce07..9bd58c45ce05c7c445297e5bbae7a38b52e719ce 100644 (file)
@@ -132,6 +132,16 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
           I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
         return true;
 
+      // V_READFIRSTLANE/V_READLANE destination register may be used as operand
+      // by some SALU instruction. If exec mask is zero vector instruction
+      // defining the register that is used by the scalar one is not executed
+      // and scalar instruction will operate on undefined data. For
+      // V_READFIRSTLANE/V_READLANE we should avoid predicated execution.
+      if ((I->getOpcode() == AMDGPU::V_READFIRSTLANE_B32) ||
+          (I->getOpcode() == AMDGPU::V_READLANE_B32)) {
+        return true;
+      }
+
       if (I->isInlineAsm()) {
         const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
         const char *AsmStr = I->getOperand(0).getSymbolName();
diff --git a/test/CodeGen/AMDGPU/readlane_exec0.mir b/test/CodeGen/AMDGPU/readlane_exec0.mir
new file mode 100644 (file)
index 0000000..b6d58d7
--- /dev/null
@@ -0,0 +1,32 @@
+# RUN: llc -o - %s -march=amdgcn -mcpu=fiji  -run-pass=si-insert-skips -verify-machineinstrs | FileCheck -check-prefix=GCN %s
+
+# GCN-LABEL: readlane_exec0
+# GCN: bb.0
+# GCN: S_CBRANCH_EXECZ %bb.2
+
+---
+name: readlane_exec0
+
+body:       |
+  bb.0:
+    successors: %bb.1, %bb.2
+    liveins: %vgpr1_vgpr2:0x00000001, %vgpr2_vgpr3:0x00000003
+
+    %vgpr4 = V_AND_B32_e32 1, %vgpr1, implicit %exec
+    V_CMP_EQ_U32_e32 1, killed %vgpr4, implicit-def %vcc, implicit %exec
+    %sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed %vcc, implicit-def %exec, implicit-def %scc, implicit %exec
+    SI_MASK_BRANCH %bb.2, implicit %exec
+    S_BRANCH %bb.1
+
+  bb.1:
+
+   %sgpr10 = V_READFIRSTLANE_B32 %vgpr2, implicit %exec
+   %sgpr11 = V_READFIRSTLANE_B32 %vgpr3, implicit %exec
+   %sgpr10 = S_LOAD_DWORD_IMM killed %sgpr10_sgpr11, 0, 0
+   S_WAITCNT 127
+   %vgpr0 = V_XOR_B32_e32 killed %sgpr10, killed %vgpr0, implicit %exec
+
+  bb.2:
+
+    %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
+...