cl::Hidden, cl::desc("Enable long calls for save-restore stubs."),
cl::init(false), cl::ZeroOrMore);
-static cl::opt<bool> UseAllocframe("use-allocframe", cl::init(true),
- cl::Hidden, cl::desc("Use allocframe more conservatively"));
+static cl::opt<bool> EliminateFramePointer("hexagon-fp-elim", cl::init(true),
+ cl::Hidden, cl::desc("Refrain from using FP whenever possible"));
static cl::opt<bool> OptimizeSpillSlots("hexagon-opt-spill", cl::Hidden,
cl::init(true), cl::desc("Optimize spill slots"));
auto &HST = MF.getSubtarget<HexagonSubtarget>();
auto &HII = *HST.getInstrInfo();
auto &HRI = *HST.getRegisterInfo();
- DebugLoc dl;
unsigned MaxAlign = std::max(MFI.getMaxAlignment(), getStackAlignment());
MI->eraseFromParent();
}
- if (!hasFP(MF))
- return;
-
- // Check for overflow.
- // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
- const unsigned int ALLOCFRAME_MAX = 16384;
+ DebugLoc dl = MBB.findDebugLoc(InsertPt);
- // Create a dummy memory operand to avoid allocframe from being treated as
- // a volatile memory reference.
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore,
- 4, 4);
-
- if (NumBytes >= ALLOCFRAME_MAX) {
- // Emit allocframe(#0).
- BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
- .addImm(0)
- .addMemOperand(MMO);
-
- // Subtract offset from frame pointer.
- // We use a caller-saved non-parameter register for that.
- unsigned CallerSavedReg = HRI.getFirstCallerSavedNonParamReg();
- BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::CONST32),
- CallerSavedReg).addImm(NumBytes);
- BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_sub), SP)
+ if (hasFP(MF)) {
+ insertAllocframe(MBB, InsertPt, NumBytes);
+ if (AlignStack) {
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP)
+ .addReg(SP)
+ .addImm(-int64_t(MaxAlign));
+ }
+ // If the stack-checking is enabled, and we spilled the callee-saved
+ // registers inline (i.e. did not use a spill function), then call
+ // the stack checker directly.
+ if (EnableStackOVFSanitizer && !PrologueStubs)
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_call_stk))
+ .addExternalSymbol("__runtime_stack_check");
+ } else if (NumBytes > 0) {
+ assert(alignTo(NumBytes, 8) == NumBytes);
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
.addReg(SP)
- .addReg(CallerSavedReg);
- } else {
- BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
- .addImm(NumBytes)
- .addMemOperand(MMO);
+ .addImm(-int(NumBytes));
}
-
- if (AlignStack) {
- BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP)
- .addReg(SP)
- .addImm(-int64_t(MaxAlign));
- }
-
- // If the stack-checking is enabled, and we spilled the callee-saved
- // registers inline (i.e. did not use a spill function), then call
- // the stack checker directly.
- if (EnableStackOVFSanitizer && !PrologueStubs)
- BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_call_stk))
- .addExternalSymbol("__runtime_stack_check");
}
void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
MachineFunction &MF = *MBB.getParent();
- if (!hasFP(MF))
- return;
-
auto &HST = MF.getSubtarget<HexagonSubtarget>();
auto &HII = *HST.getInstrInfo();
auto &HRI = *HST.getRegisterInfo();
unsigned SP = HRI.getStackRegister();
+ MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator();
+ DebugLoc dl = MBB.findDebugLoc(InsertPt);
+
+ if (!hasFP(MF)) {
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (unsigned NumBytes = MFI.getStackSize()) {
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
+ .addReg(SP)
+ .addImm(NumBytes);
+ }
+ return;
+ }
+
MachineInstr *RetI = getReturn(MBB);
unsigned RetOpc = RetI ? RetI->getOpcode() : 0;
- MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator();
- DebugLoc DL;
- if (InsertPt != MBB.end())
- DL = InsertPt->getDebugLoc();
- else if (!MBB.empty())
- DL = std::prev(MBB.end())->getDebugLoc();
-
// Handle EH_RETURN.
if (RetOpc == Hexagon::EH_RETURN_JMPR) {
- BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe));
- BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::A2_add), SP)
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe));
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_add), SP)
.addReg(SP)
.addReg(Hexagon::R28);
return;
// otherwise just add deallocframe. The function could be returning via a
// tail call.
if (RetOpc != Hexagon::PS_jmpret || DisableDeallocRet) {
- BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe));
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe));
return;
}
unsigned NewOpc = Hexagon::L4_return;
- MachineInstr *NewI = BuildMI(MBB, RetI, DL, HII.get(NewOpc));
+ MachineInstr *NewI = BuildMI(MBB, RetI, dl, HII.get(NewOpc));
// Transfer the function live-out registers.
NewI->copyImplicitOps(MF, *RetI);
MBB.erase(RetI);
}
+void HexagonFrameLowering::insertAllocframe(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertPt, unsigned NumBytes) const {
+ MachineFunction &MF = *MBB.getParent();
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ auto &HII = *HST.getInstrInfo();
+ auto &HRI = *HST.getRegisterInfo();
+
+ // Check for overflow.
+ // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
+ const unsigned int ALLOCFRAME_MAX = 16384;
+
+ // Create a dummy memory operand to avoid allocframe from being treated as
+ // a volatile memory reference.
+ auto *MMO = MF.getMachineMemOperand(MachinePointerInfo::getStack(MF, 0),
+ MachineMemOperand::MOStore, 4, 4);
+
+ DebugLoc dl = MBB.findDebugLoc(InsertPt);
+
+ if (NumBytes >= ALLOCFRAME_MAX) {
+ // Emit allocframe(#0).
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
+ .addImm(0)
+ .addMemOperand(MMO);
+
+ // Subtract the size from the stack pointer.
+ unsigned SP = HRI.getStackRegister();
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
+ .addReg(SP)
+ .addImm(-int(NumBytes));
+ } else {
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
+ .addImm(NumBytes)
+ .addMemOperand(MMO);
+ }
+}
+
void HexagonFrameLowering::updateEntryPaths(MachineFunction &MF,
MachineBasicBlock &SaveB) const {
SetVector<unsigned> Worklist;
}
bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
+ if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
+ return false;
+
auto &MFI = MF.getFrameInfo();
auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
-
- bool HasFixed = MFI.getNumFixedObjects();
- bool HasPrealloc = const_cast<MachineFrameInfo&>(MFI)
- .getLocalFrameObjectCount();
bool HasExtraAlign = HRI.needsStackRealignment(MF);
bool HasAlloca = MFI.hasVarSizedObjects();
// By default we want to use SP (since it's always there). FP requires
// some setup (i.e. ALLOCFRAME).
- // Fixed and preallocated objects need FP if the distance from them to
- // the SP is unknown (as is with alloca or aligna).
- if ((HasFixed || HasPrealloc) && (HasAlloca || HasExtraAlign))
+ // Both, alloca and stack alignment modify the stack pointer by an
+ // undetermined value, so we need to save it at the entry to the function
+ // (i.e. use allocframe).
+ if (HasAlloca || HasExtraAlign)
return true;
if (MFI.getStackSize() > 0) {
- if (EnableStackOVFSanitizer || UseAllocframe)
+ // If FP-elimination is disabled, we have to use FP at this point.
+ const TargetMachine &TM = MF.getTarget();
+ if (TM.Options.DisableFramePointerElim(MF) || !EliminateFramePointer)
+ return true;
+ if (EnableStackOVFSanitizer)
return true;
}
- if (MFI.hasCalls() ||
- MF.getInfo<HexagonMachineFunctionInfo>()->hasClobberLR())
+ const auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
+ if (MFI.hasCalls() || HMFI.hasClobberLR())
+ return true;
+
+ // Frame pointer elimination is a possiblility at this point, but
+ // to know if FP is necessary we need to know if spill/restore
+ // functions will be used (they require FP to be valid).
+ // This means that hasFP shouldn't really be called before CSI is
+ // calculated, and some measures are taken to make sure of that
+ // (e.g. default implementations of virtual functions that call it
+ // are overridden apropriately).
+ assert(MFI.isCalleeSavedInfoValid() && "Need to know CSI");
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ if (useSpillFunction(MF, CSI) || useRestoreFunction(MF, CSI))
return true;
return false;
bool HasExtraAlign = HRI.needsStackRealignment(MF);
bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None;
- unsigned FrameSize = MFI.getStackSize();
- unsigned SP = HRI.getStackRegister(), FP = HRI.getFrameRegister();
auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
+ unsigned FrameSize = MFI.getStackSize();
+ unsigned SP = HRI.getStackRegister();
+ unsigned FP = HRI.getFrameRegister();
unsigned AP = HMFI.getStackAlignBasePhysReg();
// It may happen that AP will be absent even HasAlloca && HasExtraAlign
// is true. HasExtraAlign may be set because of vector spills, without
// there will be no SP -= FrameSize), so the frame size should not be
// added to the calculated offset.
int RealOffset = Offset;
- if (!UseFP && !UseAP && HasFP)
+ if (!UseFP && !UseAP)
RealOffset = FrameSize+Offset;
return RealOffset;
}
/// be generated via inline code. If this function returns "true", inline
/// code will be generated. If this function returns "false", additional
/// checks are performed, which may still lead to the inline code.
-bool HexagonFrameLowering::shouldInlineCSR(MachineFunction &MF,
+bool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF,
const CSIVect &CSI) const {
if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())
return true;
return false;
}
-bool HexagonFrameLowering::useSpillFunction(MachineFunction &MF,
+bool HexagonFrameLowering::useSpillFunction(const MachineFunction &MF,
const CSIVect &CSI) const {
if (shouldInlineCSR(MF, CSI))
return false;
return Threshold < NumCSI;
}
-bool HexagonFrameLowering::useRestoreFunction(MachineFunction &MF,
+bool HexagonFrameLowering::useRestoreFunction(const MachineFunction &MF,
const CSIVect &CSI) const {
if (shouldInlineCSR(MF, CSI))
return false;
SDValue
HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
- SDNode *Node = Op.getNode();
MachineFunction &MF = DAG.getMachineFunction();
- auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
- switch (Node->getOpcode()) {
- case ISD::INLINEASM: {
- unsigned NumOps = Node->getNumOperands();
- if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
- --NumOps; // Ignore the flag operand.
-
- for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
- if (FuncInfo.hasClobberLR())
- break;
- unsigned Flags =
- cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
- unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
- ++i; // Skip the ID value.
-
- switch (InlineAsm::getKind(Flags)) {
- default: llvm_unreachable("Bad flags!");
- case InlineAsm::Kind_RegDef:
- case InlineAsm::Kind_RegUse:
- case InlineAsm::Kind_Imm:
- case InlineAsm::Kind_Clobber:
- case InlineAsm::Kind_Mem: {
- for (; NumVals; --NumVals, ++i) {}
- break;
- }
- case InlineAsm::Kind_RegDefEarlyClobber: {
- for (; NumVals; --NumVals, ++i) {
- unsigned Reg =
- cast<RegisterSDNode>(Node->getOperand(i))->getReg();
-
- // Check it to be lr
- const HexagonRegisterInfo *QRI = Subtarget.getRegisterInfo();
- if (Reg == QRI->getRARegister()) {
- FuncInfo.setHasClobberLR(true);
- break;
- }
- }
- break;
- }
+ auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
+ const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
+ unsigned LR = HRI.getRARegister();
+
+ if (Op.getOpcode() != ISD::INLINEASM || HMFI.hasClobberLR())
+ return Op;
+
+ unsigned NumOps = Op.getNumOperands();
+ if (Op.getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the flag operand.
+
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ ++i; // Skip the ID value.
+
+ switch (InlineAsm::getKind(Flags)) {
+ default:
+ llvm_unreachable("Bad flags!");
+ case InlineAsm::Kind_RegUse:
+ case InlineAsm::Kind_Imm:
+ case InlineAsm::Kind_Mem:
+ i += NumVals;
+ break;
+ case InlineAsm::Kind_Clobber:
+ case InlineAsm::Kind_RegDef:
+ case InlineAsm::Kind_RegDefEarlyClobber: {
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
+ if (Reg != LR)
+ continue;
+ HMFI.setHasClobberLR(true);
+ return Op;
}
+ break;
}
}
- } // Node->getOpcode
+ }
+
return Op;
}
--- /dev/null
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+target triple = "hexagon"
+
+; FP elimination enabled.
+;
+; CHECK-LABEL: danny:
+; CHECK: r29 = add(r29,#-[[SIZE:[0-9]+]])
+; CHECK: r29 = add(r29,#[[SIZE]])
+define i32 @danny(i32 %a0, i32 %a1) local_unnamed_addr #0 {
+b2:
+ %v3 = alloca [32 x i32], align 8
+ %v4 = bitcast [32 x i32]* %v3 to i8*
+ call void @llvm.lifetime.start.p0i8(i64 128, i8* nonnull %v4) #3
+ br label %b5
+
+b5: ; preds = %b5, %b2
+ %v6 = phi i32 [ 0, %b2 ], [ %v8, %b5 ]
+ %v7 = getelementptr inbounds [32 x i32], [32 x i32]* %v3, i32 0, i32 %v6
+ store i32 %v6, i32* %v7, align 4
+ %v8 = add nuw nsw i32 %v6, 1
+ %v9 = icmp eq i32 %v8, 32
+ br i1 %v9, label %b10, label %b5
+
+b10: ; preds = %b5
+ %v11 = getelementptr inbounds [32 x i32], [32 x i32]* %v3, i32 0, i32 %a0
+ store i32 %a1, i32* %v11, align 4
+ br label %b12
+
+b12: ; preds = %b12, %b10
+ %v13 = phi i32 [ 0, %b10 ], [ %v18, %b12 ]
+ %v14 = phi i32 [ 0, %b10 ], [ %v17, %b12 ]
+ %v15 = getelementptr inbounds [32 x i32], [32 x i32]* %v3, i32 0, i32 %v13
+ %v16 = load i32, i32* %v15, align 4
+ %v17 = add nsw i32 %v16, %v14
+ %v18 = add nuw nsw i32 %v13, 1
+ %v19 = icmp eq i32 %v18, 32
+ br i1 %v19, label %b20, label %b12
+
+b20: ; preds = %b12
+ call void @llvm.lifetime.end.p0i8(i64 128, i8* nonnull %v4) #3
+ ret i32 %v17
+}
+
+; FP elimination disabled.
+;
+; CHECK-LABEL: sammy:
+; CHECK: allocframe
+; CHECK: dealloc_return
+define i32 @sammy(i32 %a0, i32 %a1) local_unnamed_addr #1 {
+b2:
+ %v3 = alloca [32 x i32], align 8
+ %v4 = bitcast [32 x i32]* %v3 to i8*
+ call void @llvm.lifetime.start.p0i8(i64 128, i8* nonnull %v4) #3
+ br label %b5
+
+b5: ; preds = %b5, %b2
+ %v6 = phi i32 [ 0, %b2 ], [ %v8, %b5 ]
+ %v7 = getelementptr inbounds [32 x i32], [32 x i32]* %v3, i32 0, i32 %v6
+ store i32 %v6, i32* %v7, align 4
+ %v8 = add nuw nsw i32 %v6, 1
+ %v9 = icmp eq i32 %v8, 32
+ br i1 %v9, label %b10, label %b5
+
+b10: ; preds = %b5
+ %v11 = getelementptr inbounds [32 x i32], [32 x i32]* %v3, i32 0, i32 %a0
+ store i32 %a1, i32* %v11, align 4
+ br label %b12
+
+b12: ; preds = %b12, %b10
+ %v13 = phi i32 [ 0, %b10 ], [ %v18, %b12 ]
+ %v14 = phi i32 [ 0, %b10 ], [ %v17, %b12 ]
+ %v15 = getelementptr inbounds [32 x i32], [32 x i32]* %v3, i32 0, i32 %v13
+ %v16 = load i32, i32* %v15, align 4
+ %v17 = add nsw i32 %v16, %v14
+ %v18 = add nuw nsw i32 %v13, 1
+ %v19 = icmp eq i32 %v18, 32
+ br i1 %v19, label %b20, label %b12
+
+b20: ; preds = %b12
+ call void @llvm.lifetime.end.p0i8(i64 128, i8* nonnull %v4) #3
+ ret i32 %v17
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #2
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #2
+
+attributes #0 = { nounwind readnone "no-frame-pointer-elim"="false" "target-cpu"="hexagonv60" }
+attributes #1 = { nounwind readnone "no-frame-pointer-elim"="true" "target-cpu"="hexagonv60" }
+attributes #2 = { argmemonly nounwind }
+attributes #3 = { nounwind }