return ScratchRsrcReg;
}
-unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
+// Shift down registers reserved for the scratch wave offset and stack pointer
+// SGPRs.
+std::pair<unsigned, unsigned>
+SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
const SISubtarget &ST,
const SIInstrInfo *TII,
const SIRegisterInfo *TRI,
// No replacement necessary.
if (ScratchWaveOffsetReg == AMDGPU::NoRegister ||
- !MRI.isPhysRegUsed(ScratchWaveOffsetReg))
- return AMDGPU::NoRegister;
+ !MRI.isPhysRegUsed(ScratchWaveOffsetReg)) {
+ assert(MFI->getStackPtrOffsetReg() == AMDGPU::NoRegister);
+ return std::make_pair(AMDGPU::NoRegister, AMDGPU::NoRegister);
+ }
- if (ST.hasSGPRInitBug() ||
- ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF))
- return ScratchWaveOffsetReg;
+ unsigned SPReg = MFI->getStackPtrOffsetReg();
+ if (ST.hasSGPRInitBug())
+ return std::make_pair(ScratchWaveOffsetReg, SPReg);
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
if (NumPreloaded > AllSGPRs.size())
- return ScratchWaveOffsetReg;
+ return std::make_pair(ScratchWaveOffsetReg, SPReg);
AllSGPRs = AllSGPRs.slice(NumPreloaded);
// register from the list to consider, it means that when this
// register is being used for the scratch wave offset and there
// are no other free SGPRs, then the value will stay in this register.
+ // + 1 if stack pointer is used.
// ----
- // 13
- if (AllSGPRs.size() < 13)
- return ScratchWaveOffsetReg;
+ // 13 (+1)
+ unsigned ReservedRegCount = 13;
+ if (SPReg != AMDGPU::NoRegister)
+ ++ReservedRegCount;
+
+ if (AllSGPRs.size() < ReservedRegCount)
+ return std::make_pair(ScratchWaveOffsetReg, SPReg);
- for (MCPhysReg Reg : AllSGPRs.drop_back(13)) {
+ bool HandledScratchWaveOffsetReg =
+ ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
+
+ for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) {
// Pick the first unallocated SGPR. Be careful not to pick an alias of the
// scratch descriptor, since we haven’t added its uses yet.
if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
- MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
- MFI->setScratchWaveOffsetReg(Reg);
- return Reg;
+ if (!HandledScratchWaveOffsetReg) {
+ HandledScratchWaveOffsetReg = true;
+
+ MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
+ MFI->setScratchWaveOffsetReg(Reg);
+ ScratchWaveOffsetReg = Reg;
+ } else {
+ if (SPReg == AMDGPU::NoRegister)
+ break;
+
+ MRI.replaceRegWith(SPReg, Reg);
+ MFI->setStackPtrOffsetReg(Reg);
+ SPReg = Reg;
+ break;
+ }
}
}
- return ScratchWaveOffsetReg;
+ return std::make_pair(ScratchWaveOffsetReg, SPReg);
}
void SIFrameLowering::emitPrologue(MachineFunction &MF,
if (MF.getFrameInfo().hasStackObjects() && MFI->hasFlatScratchInit())
emitFlatScratchInit(ST, MF, MBB);
+ unsigned SPReg = MFI->getStackPtrOffsetReg();
+ if (SPReg != AMDGPU::NoRegister) {
+ DebugLoc DL;
+ int64_t StackSize = MF.getFrameInfo().getStackSize();
+
+ if (StackSize == 0) {
+ BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::COPY), SPReg)
+ .addReg(MFI->getScratchWaveOffsetReg());
+ } else {
+ BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::S_ADD_U32), SPReg)
+ .addReg(MFI->getScratchWaveOffsetReg())
+ .addImm(StackSize * ST.getWavefrontSize());
+ }
+ }
+
unsigned ScratchRsrcReg
= getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF);
- unsigned ScratchWaveOffsetReg
+
+ unsigned ScratchWaveOffsetReg;
+ std::tie(ScratchWaveOffsetReg, SPReg)
= getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
// It's possible to have uses of only ScratchWaveOffsetReg without