From 5185caa70db4611c18ee6e6c2529dd5e345f0a15 Mon Sep 17 00:00:00 2001 From: Zaara Syeda Date: Thu, 21 Sep 2017 16:12:33 +0000 Subject: [PATCH] [Power9] Spill gprs to vector registers rather than stack This patch updates register allocation to enable spilling gprs to volatile vector registers rather than the stack. It can be enabled for Power9 with option -ppc-enable-gpr-to-vsr-spills. Differential Revision: https://reviews.llvm.org/D34815 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313886 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstrInfo.cpp | 71 +++++++++++++++++++++++++- lib/Target/PowerPC/PPCInstrVSX.td | 24 +++++++++ lib/Target/PowerPC/PPCRegisterInfo.cpp | 22 ++++++++ lib/Target/PowerPC/PPCRegisterInfo.td | 5 ++ test/CodeGen/PowerPC/gpr-vsr-spill.ll | 24 +++++++++ 5 files changed, 145 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/PowerPC/gpr-vsr-spill.ll diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index d65d17beca3..80530ec19be 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -46,6 +46,12 @@ using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR #include "PPCGenInstrInfo.inc" +STATISTIC(NumStoreSPILLVSRRCAsVec, + "Number of spillvsrrc spilled to stack as vec"); +STATISTIC(NumStoreSPILLVSRRCAsGpr, + "Number of spillvsrrc spilled to stack as gpr"); +STATISTIC(NumGPRtoVSRSpill, "Number of gpr spills to spillvsrrc"); + static cl:: opt DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden, cl::desc("Disable analysis for CTR loops")); @@ -280,6 +286,7 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, case PPC::QVLFSXs: case PPC::QVLFDXb: case PPC::RESTORE_VRSAVE: + case PPC::SPILLTOVSR_LD: // Check for the operands added by addFrameReference (the immediate is the // offset which defaults to 0). if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() && @@ -333,6 +340,7 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr &MI, case PPC::QVSTFSXs: case PPC::QVSTFDXb: case PPC::SPILL_VRSAVE: + case PPC::SPILLTOVSR_ST: // Check for the operands added by addFrameReference (the immediate is the // offset which defaults to 0). if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() && @@ -917,7 +925,18 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(SrcReg); getKillRegState(KillSrc); return; - } + } else if (PPC::G8RCRegClass.contains(SrcReg) && + PPC::VSFRCRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(PPC::MTVSRD), DestReg).addReg(SrcReg); + NumGPRtoVSRSpill++; + getKillRegState(KillSrc); + return; + } else if (PPC::VSFRCRegClass.contains(SrcReg) && + PPC::G8RCRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(PPC::MFVSRD), DestReg).addReg(SrcReg); + getKillRegState(KillSrc); + return; + } unsigned Opc; if (PPC::GPRCRegClass.contains(DestReg, SrcReg)) @@ -1061,6 +1080,11 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, getKillRegState(isKill)), FrameIdx)); NonRI = true; + } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILLTOVSR_ST)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); } else { llvm_unreachable("Unknown regclass!"); } @@ -1182,6 +1206,9 @@ bool PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL, NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFDXb), DestReg), FrameIdx)); NonRI = true; + } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILLTOVSR_LD), + DestReg), FrameIdx)); } else { llvm_unreachable("Unknown regclass!"); } @@ -1995,6 +2022,48 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MI.setDesc(get(Opcode)); return true; } + case PPC::SPILLTOVSR_LD: { + unsigned TargetReg = MI.getOperand(0).getReg(); + if (PPC::VSFRCRegClass.contains(TargetReg)) { + MI.setDesc(get(PPC::DFLOADf64)); + return expandPostRAPseudo(MI); + } + else + MI.setDesc(get(PPC::LD)); + return true; + } + case PPC::SPILLTOVSR_ST: { + unsigned SrcReg = MI.getOperand(0).getReg(); + if (PPC::VSFRCRegClass.contains(SrcReg)) { + NumStoreSPILLVSRRCAsVec++; + MI.setDesc(get(PPC::DFSTOREf64)); + return expandPostRAPseudo(MI); + } else { + NumStoreSPILLVSRRCAsGpr++; + MI.setDesc(get(PPC::STD)); + } + return true; + } + case PPC::SPILLTOVSR_LDX: { + unsigned TargetReg = MI.getOperand(0).getReg(); + if (PPC::VSFRCRegClass.contains(TargetReg)) + MI.setDesc(get(PPC::LXSDX)); + else + MI.setDesc(get(PPC::LDX)); + return true; + } + case PPC::SPILLTOVSR_STX: { + unsigned SrcReg = MI.getOperand(0).getReg(); + if (PPC::VSFRCRegClass.contains(SrcReg)) { + NumStoreSPILLVSRRCAsVec++; + MI.setDesc(get(PPC::STXSDX)); + } else { + NumStoreSPILLVSRRCAsGpr++; + MI.setDesc(get(PPC::STDX)); + } + return true; + } + case PPC::CFENCE8: { auto Val = MI.getOperand(0).getReg(); BuildMI(MBB, MI, DL, get(PPC::CMPD), PPC::CR7).addReg(Val).addReg(Val); diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index 53f107b4789..1fc50d2c860 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -47,6 +47,13 @@ def vssrc : RegisterOperand { let ParserMatchClass = PPCRegVSSRCAsmOperand; } +def PPCRegSPILLTOVSRRCAsmOperand : AsmOperandClass { + let Name = "RegSPILLTOVSRRC"; let PredicateMethod = "isVSRegNumber"; +} + +def spilltovsrrc : RegisterOperand { + let ParserMatchClass = PPCRegSPILLTOVSRRCAsmOperand; +} // Little-endian-specific nodes. def SDT_PPClxvd2x : SDTypeProfile<1, 1, [ SDTCisVT<0, v2f64>, SDTCisPtrTy<1> @@ -2863,6 +2870,23 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (f32 (DFLOADf32 ixaddr:$src))>; } // end HasP9Vector, AddedComplexity +let Predicates = [HasP9Vector] in { + let isPseudo = 1 in { + let mayStore = 1 in { + def SPILLTOVSR_STX : Pseudo<(outs), (ins spilltovsrrc:$XT, memrr:$dst), + "#SPILLTOVSR_STX", []>; + def SPILLTOVSR_ST : Pseudo<(outs), (ins spilltovsrrc:$XT, memrix:$dst), + "#SPILLTOVSR_ST", []>; + } + let mayLoad = 1 in { + def SPILLTOVSR_LDX : Pseudo<(outs spilltovsrrc:$XT), (ins memrr:$src), + "#SPILLTOVSR_LDX", []>; + def SPILLTOVSR_LD : Pseudo<(outs spilltovsrrc:$XT), (ins memrix:$src), + "#SPILLTOVSR_LD", []>; + + } + } +} // Integer extend helper dags 32 -> 64 def AnyExts { dag A = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32); diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 9207165c46a..af62066a174 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -21,6 +21,7 @@ #include "PPCTargetMachine.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -49,6 +50,9 @@ using namespace llvm; #define GET_REGINFO_TARGET_DESC #include "PPCGenRegisterInfo.inc" +STATISTIC(InflateGPRC, "Number of gprc inputs for getLargestLegalClass"); +STATISTIC(InflateGP8RC, "Number of g8rc inputs for getLargestLegalClass"); + static cl::opt EnableBasePointer("ppc-use-base-pointer", cl::Hidden, cl::init(true), cl::desc("Enable use of a base pointer for complex stack frames")); @@ -57,6 +61,10 @@ static cl::opt AlwaysBasePointer("ppc-always-use-base-pointer", cl::Hidden, cl::init(false), cl::desc("Force the use of a base pointer in every function")); +static cl::opt +EnableGPRToVecSpills("ppc-enable-gpr-to-vsr-spills", cl::Hidden, cl::init(false), + cl::desc("Enable spills from gpr to vsr rather than stack")); + PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM) : PPCGenRegisterInfo(TM.isPPC64() ? PPC::LR8 : PPC::LR, TM.isPPC64() ? 0 : 1, @@ -82,6 +90,8 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM) // VSX ImmToIdxMap[PPC::DFLOADf32] = PPC::LXSSPX; ImmToIdxMap[PPC::DFLOADf64] = PPC::LXSDX; + ImmToIdxMap[PPC::SPILLTOVSR_LD] = PPC::SPILLTOVSR_LDX; + ImmToIdxMap[PPC::SPILLTOVSR_ST] = PPC::SPILLTOVSR_STX; ImmToIdxMap[PPC::DFSTOREf32] = PPC::STXSSPX; ImmToIdxMap[PPC::DFSTOREf64] = PPC::STXSDX; ImmToIdxMap[PPC::LXV] = PPC::LXVX; @@ -328,6 +338,18 @@ PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, // With VSX, we can inflate various sub-register classes to the full VSX // register set. + // For Power9 we allow the user to enable GPR to vector spills. + // FIXME: Currently limited to spilling GP8RC. A follow on patch will add + // support to spill GPRC. + if (TM.isELFv2ABI()) { + if (Subtarget.hasP9Vector() && EnableGPRToVecSpills && + RC == &PPC::G8RCRegClass) { + InflateGP8RC++; + return &PPC::SPILLTOVSRRCRegClass; + } + if (RC == &PPC::GPRCRegClass && EnableGPRToVecSpills) + InflateGPRC++; + } if (RC == &PPC::F8RCRegClass) return &PPC::VSFRCRegClass; else if (RC == &PPC::VRRCRegClass) diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 896cec7e4f6..f7807907bd6 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -305,6 +305,11 @@ def VFRC : RegisterClass<"PPC", [f64], 64, VF22, VF21, VF20)>; def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)>; +// Allow spilling GPR's into caller-saved VSR's. +def SPILLTOVSRRC : RegisterClass<"PPC", [i64, f64], 64, (add G8RC, (sub VSFRC, + (sequence "VF%u", 31, 20), + (sequence "F%u", 31, 14)))>; + // Register class for single precision scalars in VSX registers def VSSRC : RegisterClass<"PPC", [f32], 32, (add VSFRC)>; diff --git a/test/CodeGen/PowerPC/gpr-vsr-spill.ll b/test/CodeGen/PowerPC/gpr-vsr-spill.ll new file mode 100644 index 00000000000..0b1781a0646 --- /dev/null +++ b/test/CodeGen/PowerPC/gpr-vsr-spill.ll @@ -0,0 +1,24 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-enable-gpr-to-vsr-spills < %s | FileCheck %s +define signext i32 @foo(i32 signext %a, i32 signext %b) { +entry: + %cmp = icmp slt i32 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29}"(i32 %a, i32 %b) + %mul = mul nsw i32 %0, %a + %add = add i32 %b, %a + %tmp = add i32 %add, %mul + br label %if.end + +if.end: ; preds = %if.then, %entry + %e.0 = phi i32 [ %tmp, %if.then ], [ undef, %entry ] + ret i32 %e.0 +; CHECK: @foo +; CHECK: mr [[NEWREG:[0-9]+]], 3 +; CHECK: mtvsrd [[NEWREG2:[0-9]+]], 4 +; CHECK: mffprd [[REG1:[0-9]+]], [[NEWREG2]] +; CHECK: add {{[0-9]+}}, [[NEWREG]], [[REG1]] +; CHECK: mffprd [[REG2:[0-9]+]], [[NEWREG2]] +; CHECK: add {{[0-9]+}}, [[REG2]], [[NEWREG]] +} -- 2.49.0