From c6533e305bcdecf3957e5c840444d5959c6de1b1 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 7 Nov 2016 16:39:22 +0000 Subject: [PATCH] AMDGPU: Refactor copyPhysReg Separate the subregister splitting logic to re-use later. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@286118 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/SIInstrInfo.cpp | 126 ++++++--------------------- lib/Target/AMDGPU/SIRegisterInfo.cpp | 103 ++++++++++++++++++++++ lib/Target/AMDGPU/SIRegisterInfo.h | 3 + 3 files changed, 133 insertions(+), 99 deletions(-) diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 6211dc30263..94b484ed0b3 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -349,51 +349,17 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { + const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg); - static const int16_t Sub0_15[] = { - AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, - AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, - AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, - AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, - }; - - static const int16_t Sub0_15_64[] = { - AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, - AMDGPU::sub4_sub5, AMDGPU::sub6_sub7, - AMDGPU::sub8_sub9, AMDGPU::sub10_sub11, - AMDGPU::sub12_sub13, AMDGPU::sub14_sub15, - }; - - static const int16_t Sub0_7[] = { - AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, - AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, - }; - - static const int16_t Sub0_7_64[] = { - AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, - AMDGPU::sub4_sub5, AMDGPU::sub6_sub7, - }; - - static const int16_t Sub0_3[] = { - AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, - }; - - static const int16_t Sub0_3_64[] = { - AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, - }; - - static const int16_t Sub0_2[] = { - AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, - }; - - static const int16_t Sub0_1[] = { - AMDGPU::sub0, AMDGPU::sub1, - }; - - unsigned Opcode; - ArrayRef SubIndices; + if (RC == &AMDGPU::VGPR_32RegClass) { + assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) || + AMDGPU::SReg_32RegClass.contains(SrcReg)); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } - if (AMDGPU::SReg_32RegClass.contains(DestReg)) { + if (RC == &AMDGPU::SReg_32RegClass) { if (SrcReg == AMDGPU::SCC) { BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg) .addImm(-1) @@ -405,8 +371,9 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); return; + } - } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) { + if (RC == &AMDGPU::SReg_64RegClass) { if (DestReg == AMDGPU::VCC) { if (AMDGPU::SReg_64RegClass.contains(SrcReg)) { BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC) @@ -426,68 +393,29 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); return; + } - } else if (DestReg == AMDGPU::SCC) { + if (DestReg == AMDGPU::SCC) { assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U32)) - .addReg(SrcReg, getKillRegState(KillSrc)) - .addImm(0); - return; - } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) { - assert(AMDGPU::SReg_128RegClass.contains(SrcReg)); - Opcode = AMDGPU::S_MOV_B64; - SubIndices = Sub0_3_64; - - } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) { - assert(AMDGPU::SReg_256RegClass.contains(SrcReg)); - Opcode = AMDGPU::S_MOV_B64; - SubIndices = Sub0_7_64; - - } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) { - assert(AMDGPU::SReg_512RegClass.contains(SrcReg)); - Opcode = AMDGPU::S_MOV_B64; - SubIndices = Sub0_15_64; - - } else if (AMDGPU::VGPR_32RegClass.contains(DestReg)) { - assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) || - AMDGPU::SReg_32RegClass.contains(SrcReg)); - BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(0); return; + } - } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) { - assert(AMDGPU::VReg_64RegClass.contains(SrcReg) || - AMDGPU::SReg_64RegClass.contains(SrcReg)); - Opcode = AMDGPU::V_MOV_B32_e32; - SubIndices = Sub0_1; - - } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) { - assert(AMDGPU::VReg_96RegClass.contains(SrcReg)); - Opcode = AMDGPU::V_MOV_B32_e32; - SubIndices = Sub0_2; - - } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) { - assert(AMDGPU::VReg_128RegClass.contains(SrcReg) || - AMDGPU::SReg_128RegClass.contains(SrcReg)); - Opcode = AMDGPU::V_MOV_B32_e32; - SubIndices = Sub0_3; - - } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) { - assert(AMDGPU::VReg_256RegClass.contains(SrcReg) || - AMDGPU::SReg_256RegClass.contains(SrcReg)); - Opcode = AMDGPU::V_MOV_B32_e32; - SubIndices = Sub0_7; - - } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) { - assert(AMDGPU::VReg_512RegClass.contains(SrcReg) || - AMDGPU::SReg_512RegClass.contains(SrcReg)); - Opcode = AMDGPU::V_MOV_B32_e32; - SubIndices = Sub0_15; - - } else { - llvm_unreachable("Can't copy register!"); + unsigned EltSize = 4; + unsigned Opcode = AMDGPU::V_MOV_B32_e32; + if (RI.isSGPRClass(RC)) { + if (RC->getSize() > 4) { + Opcode = AMDGPU::S_MOV_B64; + EltSize = 8; + } else { + Opcode = AMDGPU::S_MOV_B32; + EltSize = 4; + } } + ArrayRef SubIndices = RI.getRegSplitParts(RC, EltSize); bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg); for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) { diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index e85ee719e30..e975f5a5341 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1168,3 +1168,106 @@ unsigned SIRegisterInfo::getMaxNumVGPRs(const MachineFunction &MF) const { return MaxNumVGPRs - getNumDebuggerReservedVGPRs(ST); } + +ArrayRef SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC, + unsigned EltSize) const { + if (EltSize == 4) { + static const int16_t Sub0_15[] = { + AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, + AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, + AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, + AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, + }; + + static const int16_t Sub0_7[] = { + AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, + AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, + }; + + static const int16_t Sub0_3[] = { + AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, + }; + + static const int16_t Sub0_2[] = { + AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, + }; + + static const int16_t Sub0_1[] = { + AMDGPU::sub0, AMDGPU::sub1, + }; + + switch (AMDGPU::getRegBitWidth(*RC->MC)) { + case 32: + return {}; + case 64: + return Sub0_1; + case 96: + return Sub0_2; + case 128: + return Sub0_3; + case 256: + return Sub0_7; + case 512: + return Sub0_15; + default: + llvm_unreachable("unhandled register size"); + } + } + + if (EltSize == 8) { + static const int16_t Sub0_15_64[] = { + AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, + AMDGPU::sub4_sub5, AMDGPU::sub6_sub7, + AMDGPU::sub8_sub9, AMDGPU::sub10_sub11, + AMDGPU::sub12_sub13, AMDGPU::sub14_sub15 + }; + + static const int16_t Sub0_7_64[] = { + AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, + AMDGPU::sub4_sub5, AMDGPU::sub6_sub7 + }; + + + static const int16_t Sub0_3_64[] = { + AMDGPU::sub0_sub1, AMDGPU::sub2_sub3 + }; + + switch (AMDGPU::getRegBitWidth(*RC->MC)) { + case 64: + return {}; + case 128: + return Sub0_3_64; + case 256: + return Sub0_7_64; + case 512: + return Sub0_15_64; + default: + llvm_unreachable("unhandled register size"); + } + } + + assert(EltSize == 16 && "unhandled register spill split size"); + + static const int16_t Sub0_15_128[] = { + AMDGPU::sub0_sub1_sub2_sub3, + AMDGPU::sub4_sub5_sub6_sub7, + AMDGPU::sub8_sub9_sub10_sub11, + AMDGPU::sub12_sub13_sub14_sub15 + }; + + static const int16_t Sub0_7_128[] = { + AMDGPU::sub0_sub1_sub2_sub3, + AMDGPU::sub4_sub5_sub6_sub7 + }; + + switch (AMDGPU::getRegBitWidth(*RC->MC)) { + case 128: + return {}; + case 256: + return Sub0_7_128; + case 512: + return Sub0_15_128; + default: + llvm_unreachable("unhandled register size"); + } +} diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h index 031369808b7..4a1f8640ad7 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/lib/Target/AMDGPU/SIRegisterInfo.h @@ -245,6 +245,9 @@ public: /// unit requirement. unsigned getMaxNumVGPRs(const MachineFunction &MF) const; + ArrayRef getRegSplitParts(const TargetRegisterClass *RC, + unsigned EltSize) const; + private: void buildSpillLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp, const MachineOperand *SrcDst, -- 2.50.1