From: Matthias Braun Date: Fri, 5 May 2017 22:04:05 +0000 (+0000) Subject: ARM: Compute MaxCallFrame size early X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=97beda06264a6d17821fe7901187bb663ae33709;p=llvm ARM: Compute MaxCallFrame size early This exposes a method in MachineFrameInfo that calculates MaxCallFrameSize and calls it after instruction selection in the ARM target. This avoids ARMBaseRegisterInfo::canRealignStack()/ARMFrameLowering::hasReservedCallFrame() giving different answers in early/late phases of codegen. The testcase shows a particular nasty example result of that where we would fail to properly align an alloca. Differential Revision: https://reviews.llvm.org/D32622 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@302303 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h index 61be9f775c9..689f3cd9fd1 100644 --- a/include/llvm/CodeGen/MachineFrameInfo.h +++ b/include/llvm/CodeGen/MachineFrameInfo.h @@ -520,6 +520,14 @@ public: bool hasTailCall() const { return HasTailCall; } void setHasTailCall() { HasTailCall = true; } + /// Computes the maximum size of a callframe and the AdjustsStack property. + /// This only works for targets defining + /// TargetInstrInfo::getCallFrameSetupOpcode(), getCallFrameDestroyOpcode(), + /// and getFrameSize(). + /// This is usually computed by the prologue epilogue inserter but some + /// targets may call this to compute it earlier. + void computeMaxCallFrameSize(const MachineFunction &MF); + /// Return the maximum size of a call frame that must be /// allocated for an outgoing function call. This is only available if /// CallFrameSetup/Destroy pseudo instructions are used by the target, and diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index f12048103f7..811858f136e 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1207,9 +1207,6 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { finishPendingPhis(); - auto &TLI = *MF->getSubtarget().getTargetLowering(); - TLI.finalizeLowering(*MF); - // Merge the argument lowering and constants block with its single // successor, the LLVM-IR entry block. We want the basic block to // be maximal. diff --git a/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/lib/CodeGen/GlobalISel/InstructionSelect.cpp index cf97c635e79..a16e14fe2db 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -24,6 +24,7 @@ #include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" #define DEBUG_TYPE "instruction-select" @@ -70,8 +71,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { // An optimization remark emitter. Used to report failures. MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr); - // FIXME: freezeReservedRegs is now done in IRTranslator, but there are many - // other MF/MFI fields we need to initialize. + // FIXME: There are many other MF/MFI fields we need to initialize. #ifndef NDEBUG // Check that our input is fully legal: we require the function to have the @@ -184,6 +184,9 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { return false; } + auto &TLI = *MF.getSubtarget().getTargetLowering(); + TLI.finalizeLowering(MF); + // FIXME: Should we accurately track changes? return true; } diff --git a/lib/CodeGen/MachineFrameInfo.cpp b/lib/CodeGen/MachineFrameInfo.cpp index 7de8434df80..73d778ff302 100644 --- a/lib/CodeGen/MachineFrameInfo.cpp +++ b/lib/CodeGen/MachineFrameInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include @@ -175,6 +176,31 @@ unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { return (unsigned)Offset; } +void MachineFrameInfo::computeMaxCallFrameSize(const MachineFunction &MF) { + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode(); + unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); + assert(FrameSetupOpcode != ~0u && FrameDestroyOpcode != ~0u && + "Can only compute MaxCallFrameSize if Setup/Destroy opcode are known"); + + MaxCallFrameSize = 0; + for (const MachineBasicBlock &MBB : MF) { + for (const MachineInstr &MI : MBB) { + unsigned Opcode = MI.getOpcode(); + if (Opcode == FrameSetupOpcode || Opcode == FrameDestroyOpcode) { + unsigned Size = TII.getFrameSize(MI); + MaxCallFrameSize = std::max(MaxCallFrameSize, Size); + AdjustsStack = true; + } else if (MI.isInlineAsm()) { + // Some inline asm's need a stack frame, as indicated by operand 1. + unsigned ExtraInfo = MI.getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); + if (ExtraInfo & InlineAsm::Extra_IsAlignStack) + AdjustsStack = true; + } + } + } +} + void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ if (Objects.empty()) return; diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 84bd670105e..bfb2cde030d 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -188,8 +188,9 @@ namespace { return Reg < regsReserved.size() && regsReserved.test(Reg); } - bool isAllocatable(unsigned Reg) { - return Reg < TRI->getNumRegs() && MRI->isAllocatable(Reg); + bool isAllocatable(unsigned Reg) const { + return Reg < TRI->getNumRegs() && TRI->isInAllocatableClass(Reg) && + !regsReserved.test(Reg); } // Analysis information if available @@ -526,7 +527,8 @@ void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { void MachineVerifier::visitMachineFunctionBefore() { lastIndex = SlotIndex(); - regsReserved = MRI->getReservedRegs(); + regsReserved = MRI->reservedRegsFrozen() ? MRI->getReservedRegs() + : TRI->getReservedRegs(*MF); if (!MF->empty()) markReachable(&MF->front()); diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 549f07ecd9c..d2afeae9e70 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -277,6 +277,9 @@ void PEI::calculateCallFrameInfo(MachineFunction &Fn) { AdjustsStack = true; } + assert(!MFI.isMaxCallFrameSizeComputed() || + (MFI.getMaxCallFrameSize() == MaxCallFrameSize && + MFI.adjustsStack() == AdjustsStack)); MFI.setAdjustsStack(AdjustsStack); MFI.setMaxCallFrameSize(MaxCallFrameSize); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index a20887564f4..b18ed509ed2 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -245,11 +245,18 @@ ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, switch (RC->getID()) { default: return 0; - case ARM::tGPRRegClassID: - return TFI->hasFP(MF) ? 4 : 5; + case ARM::tGPRRegClassID: { + // hasFP ends up calling getMaxCallFrameComputed() which may not be + // available when getPressureLimit() is called as part of + // ScheduleDAGRRList. + bool HasFP = MF.getFrameInfo().isMaxCallFrameSizeComputed() + ? TFI->hasFP(MF) : true; + return 5 - HasFP; + } case ARM::GPRRegClassID: { - unsigned FP = TFI->hasFP(MF) ? 1 : 0; - return 10 - FP - (STI.isR9Reserved() ? 1 : 0); + bool HasFP = MF.getFrameInfo().isMaxCallFrameSizeComputed() + ? TFI->hasFP(MF) : true; + return 10 - HasFP - (STI.isR9Reserved() ? 1 : 0); } case ARM::SPRRegClassID: // Currently not used as 'rep' register class. case ARM::DPRRegClassID: diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index f248a829b8e..bbf5b6665e6 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -14054,3 +14054,8 @@ void ARMTargetLowering::insertCopiesSplitCSR( .addReg(NewVR); } } + +void ARMTargetLowering::finalizeLowering(MachineFunction &MF) const { + MF.getFrameInfo().computeMaxCallFrameSize(MF); + TargetLoweringBase::finalizeLowering(MF); +} diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index e0db4c4bf82..08c51b66dfe 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -544,6 +544,8 @@ class InstrItineraryData; unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const; + void finalizeLowering(MachineFunction &MF) const override; + protected: std::pair findRepresentativeClass(const TargetRegisterInfo *TRI, diff --git a/test/CodeGen/ARM/alloca-align.ll b/test/CodeGen/ARM/alloca-align.ll new file mode 100644 index 00000000000..3bba156f0ee --- /dev/null +++ b/test/CodeGen/ARM/alloca-align.ll @@ -0,0 +1,24 @@ +; RUN: llc -o - %s | FileCheck %s +target triple="arm--" + +@glob = external global i32* + +declare void @bar(i32*, [20000 x i8]* byval) + +; CHECK-LABEL: foo: +; We should see the stack getting additional alignment +; CHECK: sub sp, sp, #16 +; CHECK: bic sp, sp, #31 +; And a base pointer getting used. +; CHECK: mov r6, sp +; Which is passed to the call +; CHECK: add [[REG:r[0-9]+]], r6, #19456 +; CHECK: add r0, [[REG]], #536 +; CHECK: bl bar +define void @foo([20000 x i8]* %addr) { + %tmp = alloca [4 x i32], align 32 + %tmp0 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0 + call void @bar(i32* %tmp0, [20000 x i8]* byval %addr) + ret void +} +