]> granicus.if.org Git - llvm/commitdiff
[AMDGPU] Select AGPR in PHI operand legalization
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Mon, 21 Oct 2019 19:25:27 +0000 (19:25 +0000)
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Mon, 21 Oct 2019 19:25:27 +0000 (19:25 +0000)
If a PHI defines AGPR legalize its operands to AGPR.
At the moment we can get an AGPR PHI with VGPR operands.
I am not aware of any problems as it seems to be handled
gracefully in RA, but this is not right anyway.

It also slightly decreases VGPR pressure in some cases
because we do not have to a copy via VGPR.

Differential Revision: https://reviews.llvm.org/D69206

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375446 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/AMDGPU/SIInstrInfo.cpp
test/CodeGen/AMDGPU/mfma-loop.ll

index 88dc938e2b88d705d42d8001bfb20a704ec21870..57c271a1a6d31988451bc2fa353a6f084ef75d58 100644 (file)
@@ -4576,6 +4576,10 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
           VRC = RI.hasAGPRs(getOpRegClass(MI, 0))
                     ? RI.getEquivalentAGPRClass(SRC)
                     : RI.getEquivalentVGPRClass(SRC);
+      } else {
+          VRC = RI.hasAGPRs(getOpRegClass(MI, 0))
+                    ? RI.getEquivalentAGPRClass(VRC)
+                    : RI.getEquivalentVGPRClass(VRC);
       }
       RC = VRC;
     } else {
index 02f7c9bcee7124a1395f68b2d0379322a25a19ab..a67aadfcd27c5be22f31e079d1f70d541fea4008 100644 (file)
@@ -1,13 +1,64 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
 ; GCN-LABEL: {{^}}test_mfma_loop_zeroinit:
-; GCN-COUNT32: v_accvgpr_write_b32
+
+; Check that we do not use 32 temp vgprs, but rotate 3 vgprs only.
+; 3 vgprs are needed to avoid wait states between writes.
+
+; FIXME: We should not be using and temporary registers at all.
+; At the moment we initialize an sgpr, then copy it via vgprs.
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2:v[0-9]+]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3:v[0-9]+]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1:v[0-9]+]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP1]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
+; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
+
+; Check that we do not copy agprs to vgprs and back inside the loop.
+
 ; GCN: [[LOOP:BB[0-9_]+]]:
 ; GCN-NOT: v_accvgpr
 ; GCN: v_mfma_f32_32x32x1f32
 ; GCN-NOT: v_accvgpr
 ; GCN: s_cbranch_scc1 [[LOOP]]
+
+; Final result should be read only once after the loop.
+
 ; GCN-COUNT32: v_accvgpr_read_b32
+
 define amdgpu_kernel void @test_mfma_loop_zeroinit(<32 x float> addrspace(1)* %arg) {
 entry:
   br label %for.cond.preheader