unsigned NumBytes) {
// This optimisation potentially adds lots of load and store
// micro-operations, it's only really a great benefit to code-size.
- if (!MF.getFunction().optForMinSize())
+ if (!Subtarget.optForMinSize())
return false;
// If only one register is pushed/popped, LLVM can use an LDR/STR
}
unsigned ResultReg = 0;
- if (Subtarget->useMovt(*FuncInfo.MF))
+ if (Subtarget->useMovt())
ResultReg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
if (ResultReg)
bool IsPositionIndependent = isPositionIndependent();
// Use movw+movt when possible, it avoids constant pool entries.
// Non-darwin targets only support static movt relocations in FastISel.
- if (Subtarget->useMovt(*FuncInfo.MF) &&
+ if (Subtarget->useMovt() &&
(Subtarget->isTargetMachO() || !IsPositionIndependent)) {
unsigned Opc;
unsigned char TF = 0;
if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs
}
- if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
+ if (Subtarget->useMovt()) return 2; // MOVW + MOVT
return 3; // Literal pool load
}
auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
auto *BB = CLI.CS.getParent();
bool PreferIndirect =
- Subtarget->isThumb() && MF.getFunction().optForMinSize() &&
+ Subtarget->isThumb() && Subtarget->optForMinSize() &&
count_if(GV->users(), [&BB](const User *U) {
return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
}) > 2;
CallOpc = ARMISD::CALL_NOLINK;
else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
// Emit regular call when code size is the priority
- !MF.getFunction().optForMinSize())
+ !Subtarget->optForMinSize())
// "mov lr, pc; b _foo" to avoid confusing the RSP
CallOpc = ARMISD::CALL_NOLINK;
else
} else if (Subtarget->isRWPI() && !IsRO) {
// SB-relative.
SDValue RelAddr;
- if (Subtarget->useMovt(DAG.getMachineFunction())) {
+ if (Subtarget->useMovt()) {
++NumMovwMovt;
SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
// If we have T2 ops, we can materialize the address directly via movt/movw
// pair. This is always cheaper.
- if (Subtarget->useMovt(DAG.getMachineFunction())) {
+ if (Subtarget->useMovt()) {
++NumMovwMovt;
// FIXME: Once remat is capable of dealing with instructions with register
// operands, expand this into two nodes.
SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- if (Subtarget->useMovt(DAG.getMachineFunction()))
+ if (Subtarget->useMovt())
++NumMovwMovt;
// FIXME: Once remat is capable of dealing with instructions with register
SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
- assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
+ assert(Subtarget->useMovt() &&
"Windows on ARM expects to use movw/movt");
assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Windows");
return SDValue();
const auto &ST = static_cast<const ARMSubtarget&>(DAG.getSubtarget());
- const auto &MF = DAG.getMachineFunction();
- const bool MinSize = MF.getFunction().optForMinSize();
+ const bool MinSize = ST.optForMinSize();
const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
: ST.hasDivideInARMMode();
// Load an immediate to varEnd.
unsigned varEnd = MRI.createVirtualRegister(TRC);
- if (Subtarget->useMovt(*MF)) {
+ if (Subtarget->useMovt()) {
unsigned Vtmp = varEnd;
if ((LoopSize & 0xFFFF0000) != 0)
Vtmp = MRI.createVirtualRegister(TRC);
return Subtarget->hasV6T2Ops();
}
+bool ARMTargetLowering::shouldExpandShift(SelectionDAG &DAG, SDNode *N) const {
+ return !Subtarget->optForMinSize();
+}
+
Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
return HasStandaloneRem;
}
- bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
- if (DAG.getMachineFunction().getFunction().optForMinSize())
- return false;
- return true;
- }
+ bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const;
CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const;
const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
const TargetMachine &TM = MF.getTarget();
- if (!Subtarget.useMovt(MF)) {
+ if (!Subtarget.useMovt()) {
if (TM.isPositionIndependent())
expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12);
else
// FIXME: Eventually this will be just "hasV6T2Ops".
let RecomputePerFunction = 1 in {
- def UseMovt : Predicate<"Subtarget->useMovt(*MF)">;
- def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">;
- def UseMovtInPic : Predicate<"Subtarget->useMovt(*MF) && Subtarget->allowPositionIndependentMovt()">;
- def DontUseMovtInPic : Predicate<"!Subtarget->useMovt(*MF) || !Subtarget->allowPositionIndependentMovt()">;
+ def UseMovt : Predicate<"Subtarget->useMovt()">;
+ def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
+ def UseMovtInPic : Predicate<"Subtarget->useMovt() && Subtarget->allowPositionIndependentMovt()">;
+ def DontUseMovtInPic : Predicate<"!Subtarget->useMovt() || !Subtarget->allowPositionIndependentMovt()">;
def UseFPVMLx: Predicate<"((Subtarget->useFPVMLx() &&"
" TM.Options.AllowFPOpFusion != FPOpFusion::Fast) ||"
- "MF->getFunction().optForMinSize())">;
+ "Subtarget->optForMinSize())">;
}
def UseMulOps : Predicate<"Subtarget->useMulOps()">;
/// arm_i32imm - True for +V6T2, or when isSOImmTwoParVal()
def arm_i32imm : PatLeaf<(imm), [{
- if (Subtarget->useMovt(*MF))
+ if (Subtarget->useMovt())
return true;
return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
}]> {
// Ideally this would be an IntImmLeaf, but then we wouldn't have access to
// the MachineFunction.
let GISelPredicateCode = [{
- const auto &MF = *MI.getParent()->getParent();
- if (STI.useMovt(MF))
+ if (STI.useMovt())
return true;
const auto &MO = MI.getOperand(1);
auto &MBB = *MIB->getParent();
auto &MF = *MBB.getParent();
- bool UseMovt = STI.useMovt(MF);
+ bool UseMovt = STI.useMovt();
unsigned Size = TM.getPointerSize(0);
unsigned Alignment = 4;
// can still change to a writeback form as that will save us 2 bytes
// of code size. It can create WAW hazards though, so only do it if
// we're minimizing code size.
- if (!MBB.getParent()->getFunction().optForMinSize() || !BaseKill)
+ if (!STI->optForMinSize() || !BaseKill)
return false;
bool HighRegsUsed = false;
(decimate (rotl SPR, 1), 4),
(decimate (rotl SPR, 1), 2))];
let AltOrderSelect = [{
- return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
+ return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs();
}];
let DiagnosticString = "operand must be a register in range [s0, s31]";
}
(decimate (rotl HPR, 1), 4),
(decimate (rotl HPR, 1), 2))];
let AltOrderSelect = [{
- return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
+ return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs();
}];
let DiagnosticString = "operand must be a register in range [s0, s31]";
}
let AltOrders = [(rotl DPR, 16),
(add (decimate (rotl DPR, 16), 2), (rotl DPR, 16))];
let AltOrderSelect = [{
- return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
+ return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs();
}];
let DiagnosticType = "DPR";
}
// Code size optimisation: do not inline memcpy if expansion results in
// more instructions than the libary call.
- if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction().optForMinSize()) {
+ if (NumMEMCPYs > 1 && Subtarget.optForMinSize()) {
return SDValue();
}
ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
- const ARMBaseTargetMachine &TM, bool IsLittle)
+ const ARMBaseTargetMachine &TM, bool IsLittle,
+ bool MinSize)
: ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps),
- CPUString(CPU), IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options),
- TM(TM), FrameLowering(initializeFrameLowering(CPU, FS)),
+ CPUString(CPU), OptMinSize(MinSize), IsLittle(IsLittle),
+ TargetTriple(TT), Options(TM.Options), TM(TM),
+ FrameLowering(initializeFrameLowering(CPU, FS)),
// At this point initializeSubtargetDependencies has been called so
// we can query directly.
InstrInfo(isThumb1Only()
bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); }
-bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const {
+bool ARMSubtarget::useStride4VFPs() const {
// For general targets, the prologue can grow when VFPs are allocated with
// stride 4 (more vpush instructions). But WatchOS uses a compact unwind
// format which it's more important to get right.
return isTargetWatchABI() ||
- (useWideStrideVFP() && !MF.getFunction().optForMinSize());
+ (useWideStrideVFP() && !OptMinSize);
}
-bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
+bool ARMSubtarget::useMovt() const {
// NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit
// immediates as it is inherently position independent, and may be out of
// range otherwise.
return !NoMovt && hasV8MBaselineOps() &&
- (isTargetWindows() || !MF.getFunction().optForMinSize() || genExecuteOnly());
+ (isTargetWindows() || !OptMinSize || genExecuteOnly());
}
bool ARMSubtarget::useFastISel() const {
/// What alignment is preferred for loop bodies, in log2(bytes).
unsigned PrefLoopAlignment = 0;
+ /// OptMinSize - True if we're optimising for minimum code size, equal to
+ /// the function attribute.
+ bool OptMinSize = false;
+
/// IsLittle - The target is Little Endian
bool IsLittle;
/// of the specified triple.
///
ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
- const ARMBaseTargetMachine &TM, bool IsLittle);
+ const ARMBaseTargetMachine &TM, bool IsLittle,
+ bool MinSize = false);
/// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
/// that still makes it profitable to inline the call.
bool disablePostRAScheduler() const { return DisablePostRAScheduler; }
bool useSoftFloat() const { return UseSoftFloat; }
bool isThumb() const { return InThumbMode; }
+ bool optForMinSize() const { return OptMinSize; }
bool isThumb1Only() const { return InThumbMode && !HasThumb2; }
bool isThumb2() const { return InThumbMode && HasThumb2; }
bool hasThumb2() const { return HasThumb2; }
isThumb1Only();
}
- bool useStride4VFPs(const MachineFunction &MF) const;
+ bool useStride4VFPs() const;
- bool useMovt(const MachineFunction &MF) const;
+ bool useMovt() const;
bool supportsTailCall() const { return SupportsTailCall; }
if (SoftFloat)
FS += FS.empty() ? "+soft-float" : ",+soft-float";
- auto &I = SubtargetMap[CPU + FS];
+ // Use the optminsize to identify the subtarget, but don't use it in the
+ // feature string.
+ std::string Key = CPU + FS;
+ if (F.optForMinSize())
+ Key += "+minsize";
+
+ auto &I = SubtargetMap[Key];
if (!I) {
// This needs to be done before we create a new subtarget since any
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
- I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle);
+ I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle,
+ F.optForMinSize());
if (!I->isThumb() && !I->hasARMOps())
F.getContext().emitError("Function '" + F.getName() + "' uses ARM "
break;
case ARM::t2LDR_POST:
case ARM::t2STR_POST: {
- if (!MBB.getParent()->getFunction().optForMinSize())
+ if (!MinimizeSize)
return false;
if (!MI->hasOneMemOperand() ||
// Optimizing / minimizing size? Minimizing size implies optimizing for size.
OptimizeSize = MF.getFunction().optForSize();
- MinimizeSize = MF.getFunction().optForMinSize();
+ MinimizeSize = STI->optForMinSize();
BlockInfo.clear();
BlockInfo.resize(MF.getNumBlockIDs());