From bcd8c96d2e0d7a69816f5d04f21da9732ca7e3b1 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Fri, 10 Feb 2017 02:07:58 +0000 Subject: [PATCH] [AMDGPU] Override PSet for M0 This change returns empty PSet list for M0 register. Otherwise its PSet as defined by tablegen is SReg_32. This results in incorrect register pressure calculation every time an instruction uses M0. Such uses count as SReg_32 PSet and inadequately increase pressure on SGPRs. Differential Revision: https://reviews.llvm.org/D29798 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294691 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/SIRegisterInfo.cpp | 8 +++ lib/Target/AMDGPU/SIRegisterInfo.h | 2 + test/CodeGen/AMDGPU/schedule-regpressure.mir | 57 ++++++++++++++++++++ 3 files changed, 67 insertions(+) create mode 100644 test/CodeGen/AMDGPU/schedule-regpressure.mir diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index 492a92a4d8a..00394623db4 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1329,3 +1329,11 @@ unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF, return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx); } + +const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const { + static const int Empty[] = { -1 }; + + if (hasRegUnit(AMDGPU::M0, RegUnit)) + return Empty; + return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit); +} diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h index 4330e610681..c95492ffd22 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/lib/Target/AMDGPU/SIRegisterInfo.h @@ -211,6 +211,8 @@ public: unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override; + const int *getRegUnitPressureSets(unsigned RegUnit) const override; + private: void buildSpillLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp, diff --git a/test/CodeGen/AMDGPU/schedule-regpressure.mir b/test/CodeGen/AMDGPU/schedule-regpressure.mir new file mode 100644 index 00000000000..c71de87eeec --- /dev/null +++ b/test/CodeGen/AMDGPU/schedule-regpressure.mir @@ -0,0 +1,57 @@ +# RUN: llc -march=amdgcn -misched=converge -run-pass machine-scheduler %s -o - -debug-only=misched 2>&1 | FileCheck %s +# REQUIRES: asserts + +# Check there is no SReg_32 pressure created by DS_* instructions because of M0 use + +# CHECK: ScheduleDAGMILive::schedule starting +# CHECK: SU({{.*}} = DS_READ_B32 {{.*}} %M0, %EXEC +# CHECK: Pressure Diff : {{$}} +# CHECK: SU({{.*}} DS_WRITE_B32 + +--- +name: mo_pset +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_128 } + - { id: 1, class: sgpr_64 } + - { id: 2, class: sreg_32_xm0 } + - { id: 3, class: sgpr_32 } + - { id: 4, class: vgpr_32 } + - { id: 5, class: sreg_32_xm0_xexec } + - { id: 6, class: vgpr_32 } + - { id: 7, class: vgpr_32 } + - { id: 8, class: vgpr_32 } +liveins: + - { reg: '%sgpr4_sgpr5', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0: + liveins: %sgpr4_sgpr5 + + %1 = COPY %sgpr4_sgpr5 + %5 = S_LOAD_DWORD_IMM %1, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`) + %m0 = S_MOV_B32 -1 + %7 = COPY %5 + %6 = DS_READ_B32 %7, 0, 0, implicit %m0, implicit %exec + DS_WRITE_B32 %7, %6, 4, 0, implicit killed %m0, implicit %exec + S_ENDPGM + +... -- 2.50.1