From e2ff20cbface83819e447eef1345ddaa9bfec441 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Fri, 29 Sep 2017 14:31:39 +0000 Subject: [PATCH] [SystemZ] implement shouldCoalesce() Implement shouldCoalesce() to help regalloc avoid running out of GR128 registers. If a COPY involving a subreg of a GR128 is coalesced, the live range of the GR128 virtual register will be extended. If this happens where there are enough phys-reg clobbers present, regalloc will run out of registers (if there is not a single GR128 allocatable register available). This patch tries to allow coalescing only when it can prove that this will be safe by checking the (local) interval in question. Review: Ulrich Weigand, Quentin Colombet https://reviews.llvm.org/D37899 https://bugs.llvm.org/show_bug.cgi?id=34610 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@314516 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetRegisterInfo.h | 4 +- lib/CodeGen/RegisterCoalescer.cpp | 2 +- lib/Target/AMDGPU/SIRegisterInfo.cpp | 3 +- lib/Target/AMDGPU/SIRegisterInfo.h | 4 +- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 3 +- lib/Target/ARM/ARMBaseRegisterInfo.h | 5 +- lib/Target/SystemZ/SystemZRegisterInfo.cpp | 67 ++++++++++++++++++++++ lib/Target/SystemZ/SystemZRegisterInfo.h | 12 ++++ test/CodeGen/SystemZ/regalloc-GR128.ll | 18 ++++++ 9 files changed, 112 insertions(+), 6 deletions(-) create mode 100644 test/CodeGen/SystemZ/regalloc-GR128.ll diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h index afa6a89a890..b2f6f991ae5 100644 --- a/include/llvm/Target/TargetRegisterInfo.h +++ b/include/llvm/Target/TargetRegisterInfo.h @@ -40,6 +40,7 @@ class MachineFunction; class MachineInstr; class RegScavenger; class VirtRegMap; +class LiveIntervals; class TargetRegisterClass { public: @@ -959,7 +960,8 @@ public: unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, - const TargetRegisterClass *NewRC) const + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const { return true; } //===--------------------------------------------------------------------===// diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 255d17078a1..152149ac9a7 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -1583,7 +1583,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { std::swap(SrcRC, DstRC); } if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx, - CP.getNewRC())) { + CP.getNewRC(), *LIS)) { DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n"); return false; } diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index 7c73f92eed2..a367bd7e129 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1474,7 +1474,8 @@ bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, - const TargetRegisterClass *NewRC) const { + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const { unsigned SrcSize = getRegSizeInBits(*SrcRC); unsigned DstSize = getRegSizeInBits(*DstRC); unsigned NewSize = getRegSizeInBits(*NewRC); diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h index 65655b79c21..bf814b6974a 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/lib/Target/AMDGPU/SIRegisterInfo.h @@ -22,6 +22,7 @@ namespace llvm { +class LiveIntervals; class MachineRegisterInfo; class SISubtarget; class SIMachineFunctionInfo; @@ -212,7 +213,8 @@ public: unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, - const TargetRegisterClass *NewRC) const override; + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const override; unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 17269268112..bf39aebaf44 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -807,7 +807,8 @@ bool ARMBaseRegisterInfo::shouldCoalesce(MachineInstr *MI, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, - const TargetRegisterClass *NewRC) const { + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const { auto MBB = MI->getParent(); auto MF = MBB->getParent(); const MachineRegisterInfo &MRI = MF->getRegInfo(); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 2e91d9d4be2..a8e947184ea 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -27,6 +27,8 @@ namespace llvm { +class LiveIntervals; + /// Register allocation hints. namespace ARMRI { @@ -204,7 +206,8 @@ public: unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, - const TargetRegisterClass *NewRC) const override; + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const override; }; } // end namespace llvm diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index d14a0fb0b0b..05f93ce5162 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -10,6 +10,7 @@ #include "SystemZRegisterInfo.h" #include "SystemZInstrInfo.h" #include "SystemZSubtarget.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetFrameLowering.h" @@ -152,6 +153,72 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); } +bool SystemZRegisterInfo::shouldCoalesce(MachineInstr *MI, + const TargetRegisterClass *SrcRC, + unsigned SubReg, + const TargetRegisterClass *DstRC, + unsigned DstSubReg, + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const { + assert (MI->isCopy() && "Only expecting COPY instructions"); + + // Coalesce anything which is not a COPY involving a subreg to/from GR128. + if (!(NewRC->hasSuperClassEq(&SystemZ::GR128BitRegClass) && + (getRegSizeInBits(*SrcRC) <= 64 || getRegSizeInBits(*DstRC) <= 64))) + return true; + + // Allow coalescing of a GR128 subreg COPY only if the live ranges are small + // and local to one MBB with not too much interferring registers. Otherwise + // regalloc may run out of registers. + + unsigned WideOpNo = (getRegSizeInBits(*SrcRC) == 128 ? 1 : 0); + unsigned GR128Reg = MI->getOperand(WideOpNo).getReg(); + unsigned GRNarReg = MI->getOperand((WideOpNo == 1) ? 0 : 1).getReg(); + LiveInterval &IntGR128 = LIS.getInterval(GR128Reg); + LiveInterval &IntGRNar = LIS.getInterval(GRNarReg); + + // Check that the two virtual registers are local to MBB. + MachineBasicBlock *MBB = MI->getParent(); + if (LIS.isLiveInToMBB(IntGR128, MBB) || LIS.isLiveOutOfMBB(IntGR128, MBB) || + LIS.isLiveInToMBB(IntGRNar, MBB) || LIS.isLiveOutOfMBB(IntGRNar, MBB)) + return false; + + // Find the first and last MIs of the registers. + MachineInstr *FirstMI = nullptr, *LastMI = nullptr; + if (WideOpNo == 1) { + FirstMI = LIS.getInstructionFromIndex(IntGR128.beginIndex()); + LastMI = LIS.getInstructionFromIndex(IntGRNar.endIndex()); + } else { + FirstMI = LIS.getInstructionFromIndex(IntGRNar.beginIndex()); + LastMI = LIS.getInstructionFromIndex(IntGR128.endIndex()); + } + assert (FirstMI && LastMI && "No instruction from index?"); + + // Check if coalescing seems safe by finding the set of clobbered physreg + // pairs in the region. + BitVector PhysClobbered(getNumRegs()); + MachineBasicBlock::iterator MII = FirstMI, MEE = LastMI; + MEE++; + for (; MII != MEE; ++MII) { + for (const MachineOperand &MO : MII->operands()) + if (MO.isReg() && isPhysicalRegister(MO.getReg())) { + for (MCSuperRegIterator SI(MO.getReg(), this, true/*IncludeSelf*/); + SI.isValid(); ++SI) + if (NewRC->contains(*SI)) { + PhysClobbered.set(*SI); + break; + } + } + } + + // Demand an arbitrary margin of free regs. + unsigned const DemandedFreeGR128 = 3; + if (PhysClobbered.count() > (NewRC->getNumRegs() - DemandedFreeGR128)) + return false; + + return true; +} + unsigned SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const SystemZFrameLowering *TFI = getFrameLowering(MF); diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index e41c06c98af..8b690e6da9f 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -18,6 +18,8 @@ namespace llvm { +class LiveIntervals; + namespace SystemZ { // Return the subreg to use for referring to the even and odd registers // in a GR128 pair. Is32Bit says whether we want a GR32 or GR64. @@ -59,6 +61,16 @@ public: void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override; + + /// \brief SrcRC and DstRC will be morphed into NewRC if this returns true. + bool shouldCoalesce(MachineInstr *MI, + const TargetRegisterClass *SrcRC, + unsigned SubReg, + const TargetRegisterClass *DstRC, + unsigned DstSubReg, + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const override; + unsigned getFrameRegister(const MachineFunction &MF) const override; }; diff --git a/test/CodeGen/SystemZ/regalloc-GR128.ll b/test/CodeGen/SystemZ/regalloc-GR128.ll new file mode 100644 index 00000000000..e84e23613d9 --- /dev/null +++ b/test/CodeGen/SystemZ/regalloc-GR128.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -O3 -o /dev/null +; +; Test that regalloc does not run out of registers + +; This test will include a GR128 virtual reg. +define void @test0(i64 %dividend, i64 %divisor) { + %rem = urem i64 %dividend, %divisor + call void asm sideeffect "", "{r0},{r1},{r2},{r3},{r4},{r5},{r6},{r7},{r8},{r9},{r10},{r11},{r12},{r13},{r14}"(i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 %rem) + ret void +} + +; This test will include an ADDR128 virtual reg. +define i64 @test1(i64 %dividend, i64 %divisor) { +%rem = urem i64 %dividend, %divisor +call void asm sideeffect "", "{r2},{r3},{r4},{r5},{r6},{r7},{r8},{r9},{r10},{r11},{r12},{r13},{r14}"(i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 %rem) +%ret = add i64 %rem, 1 +ret i64 %ret +} -- 2.40.0