]> granicus.if.org Git - llvm/commitdiff
[AMDGPU] Fix shouldClusterMemOps to process flat loads
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Wed, 6 Sep 2017 15:31:30 +0000 (15:31 +0000)
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Wed, 6 Sep 2017 15:31:30 +0000 (15:31 +0000)
Flat loads do not have vdata operand but have vdst instead.

Differential Revision: https://reviews.llvm.org/D37502

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@312640 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/AMDGPU/SIInstrInfo.cpp
test/CodeGen/AMDGPU/cluster-flat-loads.mir [new file with mode: 0644]

index ad9deefc1e7bea76f1c9e0610b14abc821935960..f7f6d52e751fb52ff5630b119f86febbbca5cc5a 100644 (file)
@@ -366,7 +366,11 @@ bool SIInstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
       (isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt)) ||
       (isFLAT(FirstLdSt) && isFLAT(SecondLdSt))) {
     FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdata);
+    if (!FirstDst)
+      FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst);
     SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdata);
+    if (!SecondDst)
+      SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst);
   } else if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt)) {
     FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::sdst);
     SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::sdst);
diff --git a/test/CodeGen/AMDGPU/cluster-flat-loads.mir b/test/CodeGen/AMDGPU/cluster-flat-loads.mir
new file mode 100644 (file)
index 0000000..50caba2
--- /dev/null
@@ -0,0 +1,20 @@
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass machine-scheduler %s -o - | FileCheck -check-prefix=GCN %s
+
+# GCN-LABEL: name: cluster_flat_loads
+# GCN:      FLAT_LOAD_DWORD %0, 0
+# GCN-NEXT: FLAT_LOAD_DWORD %0, 4
+# GCN-NEXT: V_ADD_F32_e64
+name: cluster_flat_loads
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: vreg_64 }
+  - { id: 1, class: vgpr_32 }
+  - { id: 2, class: vgpr_32 }
+  - { id: 3, class: vgpr_32 }
+body:             |
+  bb.0:
+    %0 = IMPLICIT_DEF
+    %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4)
+    %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit %exec
+    %3 = FLAT_LOAD_DWORD %0, 4, 0, 0, implicit %exec, implicit %flat_scr :: (load 4)
+...