[AMDGPU] Select AGPR in PHI operand legalization

author Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>

Mon, 21 Oct 2019 19:25:27 +0000 (19:25 +0000)

committer Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>

Mon, 21 Oct 2019 19:25:27 +0000 (19:25 +0000)
author Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Mon, 21 Oct 2019 19:25:27 +0000 (19:25 +0000)
committer Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Mon, 21 Oct 2019 19:25:27 +0000 (19:25 +0000)
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp

index 88dc938e2b88d705d42d8001bfb20a704ec21870..57c271a1a6d31988451bc2fa353a6f084ef75d58 100644 (file)
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4576,6 +4576,10 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
            VRC = RI.hasAGPRs(getOpRegClass(MI, 0))
                      ? RI.getEquivalentAGPRClass(SRC)
                      : RI.getEquivalentVGPRClass(SRC);
+      } else {
+          VRC = RI.hasAGPRs(getOpRegClass(MI, 0))
+                    ? RI.getEquivalentAGPRClass(VRC)
+                    : RI.getEquivalentVGPRClass(VRC);
        }
        RC = VRC;
      } else {
diff --git a/test/CodeGen/AMDGPU/mfma-loop.ll b/test/CodeGen/AMDGPU/mfma-loop.ll

index 02f7c9bcee7124a1395f68b2d0379322a25a19ab..a67aadfcd27c5be22f31e079d1f70d541fea4008 100644 (file)
--- a/test/CodeGen/AMDGPU/mfma-loop.ll
+++ b/test/CodeGen/AMDGPU/mfma-loop.ll
@@ -1,13 +1,64 @@
  ; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
  
  ; GCN-LABEL: {{^}}test_mfma_loop_zeroinit:
-; GCN-COUNT32: v_accvgpr_write_b32
+
+; Check that we do not use 32 temp vgprs, but rotate 3 vgprs only.
+; 3 vgprs are needed to avoid wait states between writes.
+
+; FIXME: We should not be using and temporary registers at all.
+; At the moment we initialize an sgpr, then copy it via vgprs.
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2:v[0-9]+]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3:v[0-9]+]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1:v[0-9]+]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; Check that we do not copy agprs to vgprs and back inside the loop.
+
  ; GCN: [[LOOP:BB[0-9_]+]]:
  ; GCN-NOT: v_accvgpr
  ; GCN: v_mfma_f32_32x32x1f32
  ; GCN-NOT: v_accvgpr
  ; GCN: s_cbranch_scc1 [[LOOP]]
+
+; Final result should be read only once after the loop.
+
  ; GCN-COUNT32: v_accvgpr_read_b32
+
  define amdgpu_kernel void @test_mfma_loop_zeroinit(<32 x float> addrspace(1)* %arg) {
  entry:
    br label %for.cond.preheader
author	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
	Mon, 21 Oct 2019 19:25:27 +0000 (19:25 +0000)
committer	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
	Mon, 21 Oct 2019 19:25:27 +0000 (19:25 +0000)
lib/Target/AMDGPU/SIInstrInfo.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/mfma-loop.ll		patch \| blob \| history