From: Sam Parker Date: Fri, 8 Feb 2019 07:57:42 +0000 (+0000) Subject: [ARM] Add OptMinSize to ARMSubtarget X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=fcb98b7928e8ea2c8b885b16c50a512f3dac1b3c;p=llvm [ARM] Add OptMinSize to ARMSubtarget In many places in the backend, we like to know whether we're optimising for code size and this is performed by checking the current machine function attributes. A subtarget is created on a per-function basis, so it's possible to know when we're compiling for code size on construction so record this in the new object. Differential Revision: https://reviews.llvm.org/D57812 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@353501 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 7278680cb31..9c8fed0e223 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2265,7 +2265,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, unsigned NumBytes) { // This optimisation potentially adds lots of load and store // micro-operations, it's only really a great benefit to code-size. - if (!MF.getFunction().optForMinSize()) + if (!Subtarget.optForMinSize()) return false; // If only one register is pushed/popped, LLVM can use an LDR/STR diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index cc1849fcf99..cd01b70b378 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -497,7 +497,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) { } unsigned ResultReg = 0; - if (Subtarget->useMovt(*FuncInfo.MF)) + if (Subtarget->useMovt()) ResultReg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); if (ResultReg) @@ -555,7 +555,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { bool IsPositionIndependent = isPositionIndependent(); // Use movw+movt when possible, it avoids constant pool entries. // Non-darwin targets only support static movt relocations in FastISel. - if (Subtarget->useMovt(*FuncInfo.MF) && + if (Subtarget->useMovt() && (Subtarget->isTargetMachO() || !IsPositionIndependent)) { unsigned Opc; unsigned char TF = 0; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 17a82e4c800..648435a3ed1 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -465,7 +465,7 @@ unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const { if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs } - if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT + if (Subtarget->useMovt()) return 2; // MOVW + MOVT return 3; // Literal pool load } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 68cc502d501..01ee6a33c7c 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2069,7 +2069,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, auto *GV = cast(Callee)->getGlobal(); auto *BB = CLI.CS.getParent(); bool PreferIndirect = - Subtarget->isThumb() && MF.getFunction().optForMinSize() && + Subtarget->isThumb() && Subtarget->optForMinSize() && count_if(GV->users(), [&BB](const User *U) { return isa(U) && cast(U)->getParent() == BB; }) > 2; @@ -2141,7 +2141,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CallOpc = ARMISD::CALL_NOLINK; else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() && // Emit regular call when code size is the priority - !MF.getFunction().optForMinSize()) + !Subtarget->optForMinSize()) // "mov lr, pc; b _foo" to avoid confusing the RSP CallOpc = ARMISD::CALL_NOLINK; else @@ -3224,7 +3224,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, } else if (Subtarget->isRWPI() && !IsRO) { // SB-relative. SDValue RelAddr; - if (Subtarget->useMovt(DAG.getMachineFunction())) { + if (Subtarget->useMovt()) { ++NumMovwMovt; SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL); RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G); @@ -3244,7 +3244,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, // If we have T2 ops, we can materialize the address directly via movt/movw // pair. This is always cheaper. - if (Subtarget->useMovt(DAG.getMachineFunction())) { + if (Subtarget->useMovt()) { ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into two nodes. @@ -3267,7 +3267,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, SDLoc dl(Op); const GlobalValue *GV = cast(Op)->getGlobal(); - if (Subtarget->useMovt(DAG.getMachineFunction())) + if (Subtarget->useMovt()) ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register @@ -3287,7 +3287,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported"); - assert(Subtarget->useMovt(DAG.getMachineFunction()) && + assert(Subtarget->useMovt() && "Windows on ARM expects to use movw/movt"); assert(!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Windows"); @@ -7808,8 +7808,7 @@ ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, return SDValue(); const auto &ST = static_cast(DAG.getSubtarget()); - const auto &MF = DAG.getMachineFunction(); - const bool MinSize = MF.getFunction().optForMinSize(); + const bool MinSize = ST.optForMinSize(); const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode() : ST.hasDivideInARMMode(); @@ -8979,7 +8978,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, // Load an immediate to varEnd. unsigned varEnd = MRI.createVirtualRegister(TRC); - if (Subtarget->useMovt(*MF)) { + if (Subtarget->useMovt()) { unsigned Vtmp = varEnd; if ((LoopSize & 0xFFFF0000) != 0) Vtmp = MRI.createVirtualRegister(TRC); @@ -14714,6 +14713,10 @@ bool ARMTargetLowering::isCheapToSpeculateCtlz() const { return Subtarget->hasV6T2Ops(); } +bool ARMTargetLowering::shouldExpandShift(SelectionDAG &DAG, SDNode *N) const { + return !Subtarget->optForMinSize(); +} + Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 5a4b3261078..992eaa53098 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -567,11 +567,7 @@ class VectorType; return HasStandaloneRem; } - bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override { - if (DAG.getMachineFunction().getFunction().optForMinSize()) - return false; - return true; - } + bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override; CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const; CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const; diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 9c085355d5c..388c889349b 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -94,7 +94,7 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const { const ARMSubtarget &Subtarget = MF.getSubtarget(); const TargetMachine &TM = MF.getTarget(); - if (!Subtarget.useMovt(MF)) { + if (!Subtarget.useMovt()) { if (TM.isPositionIndependent()) expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12); else diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 729233360f1..6555590fc5d 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -354,14 +354,14 @@ def UseNegativeImmediates : // FIXME: Eventually this will be just "hasV6T2Ops". let RecomputePerFunction = 1 in { - def UseMovt : Predicate<"Subtarget->useMovt(*MF)">; - def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">; - def UseMovtInPic : Predicate<"Subtarget->useMovt(*MF) && Subtarget->allowPositionIndependentMovt()">; - def DontUseMovtInPic : Predicate<"!Subtarget->useMovt(*MF) || !Subtarget->allowPositionIndependentMovt()">; + def UseMovt : Predicate<"Subtarget->useMovt()">; + def DontUseMovt : Predicate<"!Subtarget->useMovt()">; + def UseMovtInPic : Predicate<"Subtarget->useMovt() && Subtarget->allowPositionIndependentMovt()">; + def DontUseMovtInPic : Predicate<"!Subtarget->useMovt() || !Subtarget->allowPositionIndependentMovt()">; def UseFPVMLx: Predicate<"((Subtarget->useFPVMLx() &&" " TM.Options.AllowFPOpFusion != FPOpFusion::Fast) ||" - "MF->getFunction().optForMinSize())">; + "Subtarget->optForMinSize())">; } def UseMulOps : Predicate<"Subtarget->useMulOps()">; @@ -718,15 +718,14 @@ def mod_imm_neg : Operand, PatLeaf<(imm), [{ /// arm_i32imm - True for +V6T2, or when isSOImmTwoParVal() def arm_i32imm : PatLeaf<(imm), [{ - if (Subtarget->useMovt(*MF)) + if (Subtarget->useMovt()) return true; return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue()); }]> { // Ideally this would be an IntImmLeaf, but then we wouldn't have access to // the MachineFunction. let GISelPredicateCode = [{ - const auto &MF = *MI.getParent()->getParent(); - if (STI.useMovt(MF)) + if (STI.useMovt()) return true; const auto &MO = MI.getOperand(1); diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp index 960953235de..89735c16308 100644 --- a/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/lib/Target/ARM/ARMInstructionSelector.cpp @@ -581,7 +581,7 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB, auto &MBB = *MIB->getParent(); auto &MF = *MBB.getParent(); - bool UseMovt = STI.useMovt(MF); + bool UseMovt = STI.useMovt(); unsigned Size = TM.getPointerSize(0); unsigned Alignment = 4; diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index f5c5b1f73c6..73db4abbf69 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1286,7 +1286,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { // can still change to a writeback form as that will save us 2 bytes // of code size. It can create WAW hazards though, so only do it if // we're minimizing code size. - if (!MBB.getParent()->getFunction().optForMinSize() || !BaseKill) + if (!STI->optForMinSize() || !BaseKill) return false; bool HighRegsUsed = false; diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index b5179c37621..668d374ef16 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -301,7 +301,7 @@ def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)> { (decimate (rotl SPR, 1), 4), (decimate (rotl SPR, 1), 2))]; let AltOrderSelect = [{ - return 1 + MF.getSubtarget().useStride4VFPs(MF); + return 1 + MF.getSubtarget().useStride4VFPs(); }]; let DiagnosticString = "operand must be a register in range [s0, s31]"; } @@ -313,7 +313,7 @@ def HPR : RegisterClass<"ARM", [f16], 32, (sequence "S%u", 0, 31)> { (decimate (rotl HPR, 1), 4), (decimate (rotl HPR, 1), 2))]; let AltOrderSelect = [{ - return 1 + MF.getSubtarget().useStride4VFPs(MF); + return 1 + MF.getSubtarget().useStride4VFPs(); }]; let DiagnosticString = "operand must be a register in range [s0, s31]"; } @@ -335,7 +335,7 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 6 let AltOrders = [(rotl DPR, 16), (add (decimate (rotl DPR, 16), 2), (rotl DPR, 16))]; let AltOrderSelect = [{ - return 1 + MF.getSubtarget().useStride4VFPs(MF); + return 1 + MF.getSubtarget().useStride4VFPs(); }]; let DiagnosticType = "DPR"; } diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 6e996694861..332e4e703ed 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -170,7 +170,7 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy( // Code size optimisation: do not inline memcpy if expansion results in // more instructions than the libary call. - if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction().optForMinSize()) { + if (NumMEMCPYs > 1 && Subtarget.optForMinSize()) { return SDValue(); } diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index b1d412a4b93..692585ea79e 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -92,10 +92,12 @@ ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU, ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, - const ARMBaseTargetMachine &TM, bool IsLittle) + const ARMBaseTargetMachine &TM, bool IsLittle, + bool MinSize) : ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps), - CPUString(CPU), IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), - TM(TM), FrameLowering(initializeFrameLowering(CPU, FS)), + CPUString(CPU), OptMinSize(MinSize), IsLittle(IsLittle), + TargetTriple(TT), Options(TM.Options), TM(TM), + FrameLowering(initializeFrameLowering(CPU, FS)), // At this point initializeSubtargetDependencies has been called so // we can query directly. InstrInfo(isThumb1Only() @@ -373,20 +375,20 @@ bool ARMSubtarget::enablePostRAScheduler() const { bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); } -bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const { +bool ARMSubtarget::useStride4VFPs() const { // For general targets, the prologue can grow when VFPs are allocated with // stride 4 (more vpush instructions). But WatchOS uses a compact unwind // format which it's more important to get right. return isTargetWatchABI() || - (useWideStrideVFP() && !MF.getFunction().optForMinSize()); + (useWideStrideVFP() && !OptMinSize); } -bool ARMSubtarget::useMovt(const MachineFunction &MF) const { +bool ARMSubtarget::useMovt() const { // NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit // immediates as it is inherently position independent, and may be out of // range otherwise. return !NoMovt && hasV8MBaselineOps() && - (isTargetWindows() || !MF.getFunction().optForMinSize() || genExecuteOnly()); + (isTargetWindows() || !OptMinSize || genExecuteOnly()); } bool ARMSubtarget::useFastISel() const { diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 240fb600ec6..d2936d3ca72 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -445,6 +445,10 @@ protected: /// What alignment is preferred for loop bodies, in log2(bytes). unsigned PrefLoopAlignment = 0; + /// OptMinSize - True if we're optimising for minimum code size, equal to + /// the function attribute. + bool OptMinSize = false; + /// IsLittle - The target is Little Endian bool IsLittle; @@ -467,7 +471,8 @@ public: /// of the specified triple. /// ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, - const ARMBaseTargetMachine &TM, bool IsLittle); + const ARMBaseTargetMachine &TM, bool IsLittle, + bool MinSize = false); /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size /// that still makes it profitable to inline the call. @@ -709,6 +714,7 @@ public: bool disablePostRAScheduler() const { return DisablePostRAScheduler; } bool useSoftFloat() const { return UseSoftFloat; } bool isThumb() const { return InThumbMode; } + bool optForMinSize() const { return OptMinSize; } bool isThumb1Only() const { return InThumbMode && !HasThumb2; } bool isThumb2() const { return InThumbMode && HasThumb2; } bool hasThumb2() const { return HasThumb2; } @@ -735,9 +741,9 @@ public: isThumb1Only(); } - bool useStride4VFPs(const MachineFunction &MF) const; + bool useStride4VFPs() const; - bool useMovt(const MachineFunction &MF) const; + bool useMovt() const; bool supportsTailCall() const { return SupportsTailCall; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 433a1915a1a..9954eee2e5f 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -263,13 +263,20 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { if (SoftFloat) FS += FS.empty() ? "+soft-float" : ",+soft-float"; - auto &I = SubtargetMap[CPU + FS]; + // Use the optminsize to identify the subtarget, but don't use it in the + // feature string. + std::string Key = CPU + FS; + if (F.optForMinSize()) + Key += "+minsize"; + + auto &I = SubtargetMap[Key]; if (!I) { // This needs to be done before we create a new subtarget since any // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = llvm::make_unique(TargetTriple, CPU, FS, *this, isLittle); + I = llvm::make_unique(TargetTriple, CPU, FS, *this, isLittle, + F.optForMinSize()); if (!I->isThumb() && !I->hasARMOps()) F.getContext().emitError("Function '" + F.getName() + "' uses ARM " diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index bb36a4f3b61..be9c1ebd69f 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -453,7 +453,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, break; case ARM::t2LDR_POST: case ARM::t2STR_POST: { - if (!MBB.getParent()->getFunction().optForMinSize()) + if (!MinimizeSize) return false; if (!MI->hasOneMemOperand() || @@ -1128,7 +1128,7 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { // Optimizing / minimizing size? Minimizing size implies optimizing for size. OptimizeSize = MF.getFunction().optForSize(); - MinimizeSize = MF.getFunction().optForMinSize(); + MinimizeSize = STI->optForMinSize(); BlockInfo.clear(); BlockInfo.resize(MF.getNumBlockIDs());