From: Jessica Paquette Date: Fri, 28 Jul 2017 03:21:58 +0000 (+0000) Subject: [MachineOutliner] NFC: Split up getOutliningBenefit X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b1318fc0e6007a7dc11c50473b619551a27ff5a4;p=llvm [MachineOutliner] NFC: Split up getOutliningBenefit This is some more cleanup in preparation for some actual functional changes. This splits getOutliningBenefit into two cost functions: getOutliningCallOverhead and getOutliningFrameOverhead. These functions return the number of instructions that would be required to call a specific function and the number of instructions that would be required to construct a frame for a specific funtion. The actual outlining benefit logic is moved into the outliner, which calls these functions. The goal of refactoring getOutliningBenefit is to: - Get us closer to getting rid of the IsTailCall flag - Further split up "target-specific" things and "general algorithm" things git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@309356 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index 1843a2eed9b..9f7d8e8b92f 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -55,7 +55,7 @@ class TargetRegisterInfo; class TargetSchedModel; class TargetSubtargetInfo; -template class SmallVectorImpl; +template class SmallVectorImpl; //--------------------------------------------------------------------------- /// @@ -66,8 +66,7 @@ public: TargetInstrInfo(unsigned CFSetupOpcode = ~0u, unsigned CFDestroyOpcode = ~0u, unsigned CatchRetOpcode = ~0u, unsigned ReturnOpcode = ~0u) : CallFrameSetupOpcode(CFSetupOpcode), - CallFrameDestroyOpcode(CFDestroyOpcode), - CatchRetOpcode(CatchRetOpcode), + CallFrameDestroyOpcode(CFDestroyOpcode), CatchRetOpcode(CatchRetOpcode), ReturnOpcode(ReturnOpcode) {} TargetInstrInfo(const TargetInstrInfo &) = delete; TargetInstrInfo &operator=(const TargetInstrInfo &) = delete; @@ -79,8 +78,7 @@ public: /// Given a machine instruction descriptor, returns the register /// class constraint for OpNum, or NULL. - const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, - unsigned OpNum, + const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const; @@ -139,8 +137,7 @@ protected: /// the fixed result pair is equal to or equivalent to the source pair of /// indices: (CommutableOpIdx1, CommutableOpIdx2). It is assumed here that /// the pairs (x,y) and (y,x) are equivalent. - static bool fixCommutedOpIndices(unsigned &ResultIdx1, - unsigned &ResultIdx2, + static bool fixCommutedOpIndices(unsigned &ResultIdx1, unsigned &ResultIdx2, unsigned CommutableOpIdx1, unsigned CommutableOpIdx2); @@ -164,7 +161,7 @@ public: /// Returns true if the argument is a frame pseudo instruction. bool isFrameInstr(const MachineInstr &I) const { return I.getOpcode() == getCallFrameSetupOpcode() || - I.getOpcode() == getCallFrameDestroyOpcode(); + I.getOpcode() == getCallFrameDestroyOpcode(); } /// Returns true if the argument is a frame setup pseudo instruction. @@ -191,7 +188,8 @@ public: /// prior to the pair. int64_t getFrameTotalSize(const MachineInstr &I) const { if (isFrameSetup(I)) { - assert(I.getOperand(1).getImm() >= 0 && "Frame size must not be negative"); + assert(I.getOperand(1).getImm() >= 0 && + "Frame size must not be negative"); return getFrameSize(I) + I.getOperand(1).getImm(); } return getFrameSize(I); @@ -211,9 +209,8 @@ public: /// destination. e.g. X86::MOVSX64rr32. If this returns true, then it's /// expected the pre-extension value is available as a subreg of the result /// register. This also returns the sub-register index in SubIdx. - virtual bool isCoalescableExtInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SubIdx) const { + virtual bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, + unsigned &DstReg, unsigned &SubIdx) const { return false; } @@ -315,9 +312,7 @@ public: /// MachineSink determines on its own whether the instruction is safe to sink; /// this gives the target a hook to override the default behavior with regards /// to which instructions should be sunk. - virtual bool shouldSink(const MachineInstr &MI) const { - return true; - } + virtual bool shouldSink(const MachineInstr &MI) const { return true; } /// Re-issue the specified 'original' instruction at the /// specific location targeting a new destination register. @@ -456,9 +451,8 @@ public: /// \note The generic implementation does not provide any support for /// MI.isExtractSubregLike(). In other words, one has to override /// getExtractSubregLikeInputs for target specific instructions. - bool - getExtractSubregInputs(const MachineInstr &MI, unsigned DefIdx, - RegSubRegPairAndIdx &InputReg) const; + bool getExtractSubregInputs(const MachineInstr &MI, unsigned DefIdx, + RegSubRegPairAndIdx &InputReg) const; /// Build the equivalent inputs of a INSERT_SUBREG for the given \p MI /// and \p DefIdx. @@ -476,10 +470,9 @@ public: /// \note The generic implementation does not provide any support for /// MI.isInsertSubregLike(). In other words, one has to override /// getInsertSubregLikeInputs for target specific instructions. - bool - getInsertSubregInputs(const MachineInstr &MI, unsigned DefIdx, - RegSubRegPair &BaseReg, - RegSubRegPairAndIdx &InsertedReg) const; + bool getInsertSubregInputs(const MachineInstr &MI, unsigned DefIdx, + RegSubRegPair &BaseReg, + RegSubRegPairAndIdx &InsertedReg) const; /// Return true if two machine instructions would produce identical values. /// By default, this is only true when the two instructions @@ -625,8 +618,8 @@ public: MachineBasicBlock *DestBB, const DebugLoc &DL, int *BytesAdded = nullptr) const { - return insertBranch(MBB, DestBB, nullptr, - ArrayRef(), DL, BytesAdded); + return insertBranch(MBB, DestBB, nullptr, ArrayRef(), DL, + BytesAdded); } /// Analyze the loop code, return true if it cannot be understoo. Upon @@ -641,8 +634,8 @@ public: /// finished. Return the value/register of the the new loop count. We need /// this function when peeling off one or more iterations of a loop. This /// function assumes the nth iteration is peeled first. - virtual unsigned reduceLoopCount(MachineBasicBlock &MBB, - MachineInstr *IndVar, MachineInstr &Cmp, + virtual unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineInstr *IndVar, + MachineInstr &Cmp, SmallVectorImpl &Cond, SmallVectorImpl &PrevInsts, unsigned Iter, unsigned MaxIter) const { @@ -667,10 +660,9 @@ public: /// of the specified basic block, where the probability of the instructions /// being executed is given by Probability, and Confidence is a measure /// of our confidence that it will be properly predicted. - virtual - bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, - unsigned ExtraPredCycles, - BranchProbability Probability) const { + virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, + unsigned ExtraPredCycles, + BranchProbability Probability) const { return false; } @@ -680,12 +672,11 @@ public: /// predicates, where the probability of the true path being taken is given /// by Probability, and Confidence is a measure of our confidence that it /// will be properly predicted. - virtual bool - isProfitableToIfCvt(MachineBasicBlock &TMBB, - unsigned NumTCycles, unsigned ExtraTCycles, - MachineBasicBlock &FMBB, - unsigned NumFCycles, unsigned ExtraFCycles, - BranchProbability Probability) const { + virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTCycles, + unsigned ExtraTCycles, + MachineBasicBlock &FMBB, unsigned NumFCycles, + unsigned ExtraFCycles, + BranchProbability Probability) const { return false; } @@ -695,9 +686,9 @@ public: /// The probability of the instructions being executed is given by /// Probability, and Confidence is a measure of our confidence that it /// will be properly predicted. - virtual bool - isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, - BranchProbability Probability) const { + virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, + unsigned NumCycles, + BranchProbability Probability) const { return false; } @@ -735,9 +726,8 @@ public: /// @param TrueCycles Latency from TrueReg to select output. /// @param FalseCycles Latency from FalseReg to select output. virtual bool canInsertSelect(const MachineBasicBlock &MBB, - ArrayRef Cond, - unsigned TrueReg, unsigned FalseReg, - int &CondCycles, + ArrayRef Cond, unsigned TrueReg, + unsigned FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const { return false; } @@ -953,8 +943,7 @@ public: /// Set special operand attributes for new instructions after reassociation. virtual void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2, MachineInstr &NewMI1, - MachineInstr &NewMI2) const { - } + MachineInstr &NewMI2) const {} /// Return true when a target supports MachineCombiner. virtual bool useMachineCombiner() const { return false; } @@ -1007,9 +996,9 @@ protected: /// \pre MI.isExtractSubregLike(). /// /// \see TargetInstrInfo::getExtractSubregInputs. - virtual bool getExtractSubregLikeInputs( - const MachineInstr &MI, unsigned DefIdx, - RegSubRegPairAndIdx &InputReg) const { + virtual bool getExtractSubregLikeInputs(const MachineInstr &MI, + unsigned DefIdx, + RegSubRegPairAndIdx &InputReg) const { return false; } @@ -1040,7 +1029,7 @@ public: } virtual bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, - SmallVectorImpl &NewNodes) const { + SmallVectorImpl &NewNodes) const { return false; } @@ -1050,9 +1039,9 @@ public: /// possible. If LoadRegIndex is non-null, it is filled in with the operand /// index of the operand which will hold the register holding the loaded /// value. - virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, - bool UnfoldLoad, bool UnfoldStore, - unsigned *LoadRegIndex = nullptr) const { + virtual unsigned + getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore, + unsigned *LoadRegIndex = nullptr) const { return 0; } @@ -1061,7 +1050,8 @@ public: /// pointers are the same and the only differences between the two addresses /// are the offset. It also returns the offsets by reference. virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, - int64_t &Offset1, int64_t &Offset2) const { + int64_t &Offset1, + int64_t &Offset2) const { return false; } @@ -1115,8 +1105,8 @@ public: /// Reverses the branch condition of the specified condition list, /// returning false on success and true if it cannot be reversed. - virtual - bool reverseBranchCondition(SmallVectorImpl &Cond) const { + virtual bool + reverseBranchCondition(SmallVectorImpl &Cond) const { return true; } @@ -1128,14 +1118,10 @@ public: virtual void getNoop(MCInst &NopInst) const; /// Return true for post-incremented instructions. - virtual bool isPostIncrement(const MachineInstr &MI) const { - return false; - } + virtual bool isPostIncrement(const MachineInstr &MI) const { return false; } /// Returns true if the instruction is already predicated. - virtual bool isPredicated(const MachineInstr &MI) const { - return false; - } + virtual bool isPredicated(const MachineInstr &MI) const { return false; } /// Returns true if the instruction is a /// terminator instruction that has not been predicated. @@ -1147,9 +1133,8 @@ public: } /// Returns true if the tail call can be made conditional on BranchCond. - virtual bool - canMakeTailCallConditional(SmallVectorImpl &Cond, - const MachineInstr &TailCall) const { + virtual bool canMakeTailCallConditional(SmallVectorImpl &Cond, + const MachineInstr &TailCall) const { return false; } @@ -1167,9 +1152,8 @@ public: /// Returns true if the first specified predicate /// subsumes the second, e.g. GE subsumes GT. - virtual - bool SubsumesPredicate(ArrayRef Pred1, - ArrayRef Pred2) const { + virtual bool SubsumesPredicate(ArrayRef Pred1, + ArrayRef Pred2) const { return false; } @@ -1207,25 +1191,25 @@ public: /// Allocate and return a hazard recognizer to use for this target when /// scheduling the machine instructions before register allocation. - virtual ScheduleHazardRecognizer* + virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const; /// Allocate and return a hazard recognizer to use for this target when /// scheduling the machine instructions before register allocation. - virtual ScheduleHazardRecognizer* - CreateTargetMIHazardRecognizer(const InstrItineraryData*, + virtual ScheduleHazardRecognizer * + CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAG *DAG) const; /// Allocate and return a hazard recognizer to use for this target when /// scheduling the machine instructions after register allocation. - virtual ScheduleHazardRecognizer* - CreateTargetPostRAHazardRecognizer(const InstrItineraryData*, + virtual ScheduleHazardRecognizer * + CreateTargetPostRAHazardRecognizer(const InstrItineraryData *, const ScheduleDAG *DAG) const; /// Allocate and return a hazard recognizer to use for by non-scheduling /// passes. - virtual ScheduleHazardRecognizer* + virtual ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const { return nullptr; } @@ -1502,7 +1486,7 @@ public: /// \brief Return the value to use for the MachineCSE's LookAheadLimit, /// which is a heuristic used for CSE'ing phys reg defs. - virtual unsigned getMachineCSELookAheadLimit () const { + virtual unsigned getMachineCSELookAheadLimit() const { // The default lookahead is small to prevent unprofitable quadratic // behavior. return 5; @@ -1569,13 +1553,24 @@ public: return false; } - /// \brief Return how many instructions would be saved by outlining a - /// sequence containing \p SequenceSize instructions that appears - /// \p Occurrences times in a module. - virtual unsigned getOutliningBenefit(size_t SequenceSize, size_t Occurrences, - bool CanBeTailCall) const { + /// \brief Returns the number of instructions that will be taken to call a + /// function defined by the sequence on the closed interval [ \p StartIt, \p + /// EndIt]. + virtual size_t + getOutliningCallOverhead(MachineBasicBlock::iterator &StartIt, + MachineBasicBlock::iterator &EndIt) const { + llvm_unreachable( + "Target didn't implement TargetInstrInfo::getOutliningCallOverhead!"); + } + + /// \brief Returns the number of instructions that will be taken to construct + /// an outlined function frame for a function defined on the closed interval + /// [ \p StartIt, \p EndIt]. + virtual size_t + getOutliningFrameOverhead(MachineBasicBlock::iterator &StartIt, + MachineBasicBlock::iterator &EndIt) const { llvm_unreachable( - "Target didn't implement TargetInstrInfo::getOutliningBenefit!"); + "Target didn't implement TargetInstrInfo::getOutliningCallOverhead!"); } /// Represents how an instruction should be mapped by the outliner. @@ -1583,7 +1578,7 @@ public: /// \p Illegal instructions are those which cannot be outlined. /// \p Invisible instructions are instructions which can be outlined, but /// shouldn't actually impact the outlining result. - enum MachineOutlinerInstrType {Legal, Illegal, Invisible}; + enum MachineOutlinerInstrType { Legal, Illegal, Invisible }; /// Returns how or if \p MI should be outlined. virtual MachineOutlinerInstrType getOutliningType(MachineInstr &MI) const { @@ -1635,25 +1630,23 @@ private: }; /// \brief Provide DenseMapInfo for TargetInstrInfo::RegSubRegPair. -template<> -struct DenseMapInfo { +template <> struct DenseMapInfo { using RegInfo = DenseMapInfo; static inline TargetInstrInfo::RegSubRegPair getEmptyKey() { return TargetInstrInfo::RegSubRegPair(RegInfo::getEmptyKey(), - RegInfo::getEmptyKey()); + RegInfo::getEmptyKey()); } static inline TargetInstrInfo::RegSubRegPair getTombstoneKey() { return TargetInstrInfo::RegSubRegPair(RegInfo::getTombstoneKey(), - RegInfo::getTombstoneKey()); + RegInfo::getTombstoneKey()); } /// \brief Reuse getHashValue implementation from /// std::pair. static unsigned getHashValue(const TargetInstrInfo::RegSubRegPair &Val) { - std::pair PairVal = - std::make_pair(Val.Reg, Val.SubReg); + std::pair PairVal = std::make_pair(Val.Reg, Val.SubReg); return DenseMapInfo>::getHashValue(PairVal); } diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp index ff334a3a310..8df57a27e8a 100644 --- a/lib/CodeGen/MachineOutliner.cpp +++ b/lib/CodeGen/MachineOutliner.cpp @@ -114,7 +114,7 @@ struct OutlinedFunction { /// This is initialized after we go through and create the actual function. MachineFunction *MF = nullptr; - /// A number assigned to this function which appears at the end of its name. + /// A numbefr assigned to this function which appears at the end of its name. size_t Name; /// The number of candidates for this OutlinedFunction. @@ -813,11 +813,13 @@ struct MachineOutliner : public ModulePass { /// /// \param[in,out] CandidateList A list of outlining candidates. /// \param[in,out] FunctionList A list of functions to be outlined. + /// \param Mapper Contains instruction mapping info for outlining. /// \param MaxCandidateLen The length of the longest candidate. /// \param TII TargetInstrInfo for the module. void pruneOverlaps(std::vector &CandidateList, std::vector &FunctionList, - unsigned MaxCandidateLen, const TargetInstrInfo &TII); + InstructionMapper &Mapper, unsigned MaxCandidateLen, + const TargetInstrInfo &TII); /// Construct a suffix tree on the instructions in \p M and outline repeated /// strings from that tree. @@ -859,23 +861,40 @@ MachineOutliner::findCandidates(SuffixTree &ST, const TargetInstrInfo &TII, if (Parent.OccurrenceCount < 2 || Parent.isRoot() || !Parent.IsInTree) continue; - // How many instructions would outlining this string save? + // Figure out if this candidate is beneficial. size_t StringLen = Leaf->ConcatLen - Leaf->size(); - unsigned EndVal = ST.Str[Leaf->SuffixIdx + StringLen - 1]; - - // Determine if this is going to be tail called. - // FIXME: The target should decide this. The outlining pass shouldn't care - // about things like tail calling. It should be representation-agnostic. - MachineInstr *LastInstr = Mapper.IntegerInstructionMap[EndVal]; - assert(LastInstr && "Last instruction in sequence was unmapped!"); - bool IsTailCall = LastInstr->isTerminator(); - unsigned Benefit = - TII.getOutliningBenefit(StringLen, Parent.OccurrenceCount, IsTailCall); - - // If it's not beneficial, skip it. - if (Benefit < 1) + size_t CallOverhead = 0; + size_t FrameOverhead = 0; + size_t SequenceOverhead = StringLen; + + // Figure out the call overhead for each instance of the sequence. + for (auto &ChildPair : Parent.Children) { + SuffixTreeNode *M = ChildPair.second; + + if (M && M->IsInTree && M->isLeaf()) { + // Each sequence is over [StartIt, EndIt]. + MachineBasicBlock::iterator StartIt = Mapper.InstrList[M->SuffixIdx]; + MachineBasicBlock::iterator EndIt = + Mapper.InstrList[M->SuffixIdx + StringLen - 1]; + CallOverhead += TII.getOutliningCallOverhead(StartIt, EndIt); + } + } + + // Figure out how many instructions it'll take to construct an outlined + // function frame for this sequence. + MachineBasicBlock::iterator StartIt = Mapper.InstrList[Leaf->SuffixIdx]; + MachineBasicBlock::iterator EndIt = + Mapper.InstrList[Leaf->SuffixIdx + StringLen - 1]; + FrameOverhead = TII.getOutliningFrameOverhead(StartIt, EndIt); + + size_t OutliningCost = CallOverhead + FrameOverhead + SequenceOverhead; + size_t NotOutliningCost = SequenceOverhead * Parent.OccurrenceCount; + + if (NotOutliningCost <= OutliningCost) continue; + size_t Benefit = NotOutliningCost - OutliningCost; + if (StringLen > MaxLen) MaxLen = StringLen; @@ -910,6 +929,7 @@ MachineOutliner::findCandidates(SuffixTree &ST, const TargetInstrInfo &TII, void MachineOutliner::pruneOverlaps(std::vector &CandidateList, std::vector &FunctionList, + InstructionMapper &Mapper, unsigned MaxCandidateLen, const TargetInstrInfo &TII) { // TODO: Experiment with interval trees or other interval-checking structures @@ -993,8 +1013,18 @@ void MachineOutliner::pruneOverlaps(std::vector &CandidateList, assert(F2.OccurrenceCount > 0 && "Can't remove OutlinedFunction with no occurrences!"); F2.OccurrenceCount--; - F2.Benefit = TII.getOutliningBenefit(F2.Sequence.size(), - F2.OccurrenceCount, F2.IsTailCall); + + // Remove the call overhead from the removed sequence. + MachineBasicBlock::iterator StartIt = Mapper.InstrList[C2.StartIdx]; + MachineBasicBlock::iterator EndIt = + Mapper.InstrList[C2.StartIdx + C2.Len - 1]; + F2.Benefit += TII.getOutliningCallOverhead(StartIt, EndIt); + // Add back one instance of the sequence. + + if (F2.Sequence.size() > F2.Benefit) + F2.Benefit = 0; + else + F2.Benefit -= F2.Sequence.size(); C2.InCandidateList = false; @@ -1009,8 +1039,19 @@ void MachineOutliner::pruneOverlaps(std::vector &CandidateList, assert(F1.OccurrenceCount > 0 && "Can't remove OutlinedFunction with no occurrences!"); F1.OccurrenceCount--; - F1.Benefit = TII.getOutliningBenefit(F1.Sequence.size(), - F1.OccurrenceCount, F1.IsTailCall); + + // Remove the call overhead from the removed sequence. + MachineBasicBlock::iterator StartIt = Mapper.InstrList[C1.StartIdx]; + MachineBasicBlock::iterator EndIt = + Mapper.InstrList[C1.StartIdx + C1.Len - 1]; + F2.Benefit += TII.getOutliningCallOverhead(StartIt, EndIt); + + // Add back one instance of the sequence. + if (F1.Sequence.size() > F1.Benefit) + F1.Benefit = 0; + else + F1.Benefit -= F1.Sequence.size(); + C1.InCandidateList = false; DEBUG(dbgs() << "- Removed C1. \n"; @@ -1206,7 +1247,7 @@ bool MachineOutliner::runOnModule(Module &M) { buildCandidateList(CandidateList, FunctionList, ST, Mapper, *TII); // Remove candidates that overlap with other candidates. - pruneOverlaps(CandidateList, FunctionList, MaxCandidateLen, *TII); + pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen, *TII); // Outline each of the candidates and return true if something was outlined. return outline(M, CandidateList, FunctionList, Mapper); diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index be39fb22b70..9afd05f99e9 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -52,17 +52,17 @@ using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR #include "AArch64GenInstrInfo.inc" -static cl::opt -TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), - cl::desc("Restrict range of TB[N]Z instructions (DEBUG)")); +static cl::opt TBZDisplacementBits( + "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), + cl::desc("Restrict range of TB[N]Z instructions (DEBUG)")); -static cl::opt -CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), - cl::desc("Restrict range of CB[N]Z instructions (DEBUG)")); +static cl::opt CBZDisplacementBits( + "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), + cl::desc("Restrict range of CB[N]Z instructions (DEBUG)")); static cl::opt -BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), - cl::desc("Restrict range of Bcc instructions (DEBUG)")); + BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), + cl::desc("Restrict range of Bcc instructions (DEBUG)")); AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), @@ -172,8 +172,8 @@ bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp, return isIntN(Bits, BrOffset / 4); } -MachineBasicBlock *AArch64InstrInfo::getBranchDestBlock( - const MachineInstr &MI) const { +MachineBasicBlock * +AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const { switch (MI.getOpcode()) { default: llvm_unreachable("unexpected opcode!"); @@ -374,12 +374,9 @@ void AArch64InstrInfo::instantiateCondBranch( } } -unsigned AArch64InstrInfo::insertBranch(MachineBasicBlock &MBB, - MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - ArrayRef Cond, - const DebugLoc &DL, - int *BytesAdded) const { +unsigned AArch64InstrInfo::insertBranch( + MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const { // Shouldn't be a fall through. assert(TBB && "insertBranch must not be told to insert a fallthrough"); @@ -485,10 +482,11 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, return Opc; } -bool AArch64InstrInfo::canInsertSelect( - const MachineBasicBlock &MBB, ArrayRef Cond, - unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, - int &FalseCycles) const { +bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB, + ArrayRef Cond, + unsigned TrueReg, unsigned FalseReg, + int &CondCycles, int &TrueCycles, + int &FalseCycles) const { // Check register classes. const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); const TargetRegisterClass *RC = @@ -656,8 +654,10 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, MRI.constrainRegClass(FalseReg, RC); // Insert the csel. - BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm( - CC); + BuildMI(MBB, I, DL, get(Opc), DstReg) + .addReg(TrueReg) + .addReg(FalseReg) + .addImm(CC); } /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx. @@ -1078,11 +1078,7 @@ static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) { } } -enum AccessKind { - AK_Write = 0x01, - AK_Read = 0x10, - AK_All = 0x11 -}; +enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 }; /// True when condition flags are accessed (either by writing or reading) /// on the instruction trace starting at From and ending at To. @@ -1111,21 +1107,24 @@ static bool areCFlagsAccessedBetweenInstrs( for (--To; To != From; --To) { const MachineInstr &Instr = *To; - if ( ((AccessToCheck & AK_Write) && Instr.modifiesRegister(AArch64::NZCV, TRI)) || - ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI))) + if (((AccessToCheck & AK_Write) && + Instr.modifiesRegister(AArch64::NZCV, TRI)) || + ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI))) return true; } return false; } /// Try to optimize a compare instruction. A compare instruction is an -/// instruction which produces AArch64::NZCV. It can be truly compare instruction +/// instruction which produces AArch64::NZCV. It can be truly compare +/// instruction /// when there are no uses of its destination register. /// /// The following steps are tried in order: /// 1. Convert CmpInstr into an unconditional version. /// 2. Remove CmpInstr if above there is an instruction producing a needed -/// condition code or an instruction which can be converted into such an instruction. +/// condition code or an instruction which can be converted into such an +/// instruction. /// Only comparison with zero is supported. bool AArch64InstrInfo::optimizeCompareInstr( MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, @@ -1187,20 +1186,34 @@ static unsigned sForm(MachineInstr &Instr) { case AArch64::SUBSXri: return Instr.getOpcode(); - case AArch64::ADDWrr: return AArch64::ADDSWrr; - case AArch64::ADDWri: return AArch64::ADDSWri; - case AArch64::ADDXrr: return AArch64::ADDSXrr; - case AArch64::ADDXri: return AArch64::ADDSXri; - case AArch64::ADCWr: return AArch64::ADCSWr; - case AArch64::ADCXr: return AArch64::ADCSXr; - case AArch64::SUBWrr: return AArch64::SUBSWrr; - case AArch64::SUBWri: return AArch64::SUBSWri; - case AArch64::SUBXrr: return AArch64::SUBSXrr; - case AArch64::SUBXri: return AArch64::SUBSXri; - case AArch64::SBCWr: return AArch64::SBCSWr; - case AArch64::SBCXr: return AArch64::SBCSXr; - case AArch64::ANDWri: return AArch64::ANDSWri; - case AArch64::ANDXri: return AArch64::ANDSXri; + case AArch64::ADDWrr: + return AArch64::ADDSWrr; + case AArch64::ADDWri: + return AArch64::ADDSWri; + case AArch64::ADDXrr: + return AArch64::ADDSXrr; + case AArch64::ADDXri: + return AArch64::ADDSXri; + case AArch64::ADCWr: + return AArch64::ADCSWr; + case AArch64::ADCXr: + return AArch64::ADCSXr; + case AArch64::SUBWrr: + return AArch64::SUBSWrr; + case AArch64::SUBWri: + return AArch64::SUBSWri; + case AArch64::SUBXrr: + return AArch64::SUBSXrr; + case AArch64::SUBXri: + return AArch64::SUBSXri; + case AArch64::SBCWr: + return AArch64::SBCSWr; + case AArch64::SBCXr: + return AArch64::SBCSXr; + case AArch64::ANDWri: + return AArch64::ANDSWri; + case AArch64::ANDXri: + return AArch64::ANDSXri; } } @@ -1222,7 +1235,7 @@ struct UsedNZCV { UsedNZCV() = default; - UsedNZCV& operator |=(const UsedNZCV& UsedFlags) { + UsedNZCV &operator|=(const UsedNZCV &UsedFlags) { this->N |= UsedFlags.N; this->Z |= UsedFlags.Z; this->C |= UsedFlags.C; @@ -1238,29 +1251,29 @@ struct UsedNZCV { /// codes or we don't optimize CmpInstr in the presence of such instructions. static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) { switch (Instr.getOpcode()) { - default: - return AArch64CC::Invalid; + default: + return AArch64CC::Invalid; - case AArch64::Bcc: { - int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); - assert(Idx >= 2); - return static_cast(Instr.getOperand(Idx - 2).getImm()); - } + case AArch64::Bcc: { + int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); + assert(Idx >= 2); + return static_cast(Instr.getOperand(Idx - 2).getImm()); + } - case AArch64::CSINVWr: - case AArch64::CSINVXr: - case AArch64::CSINCWr: - case AArch64::CSINCXr: - case AArch64::CSELWr: - case AArch64::CSELXr: - case AArch64::CSNEGWr: - case AArch64::CSNEGXr: - case AArch64::FCSELSrrr: - case AArch64::FCSELDrrr: { - int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); - assert(Idx >= 1); - return static_cast(Instr.getOperand(Idx - 1).getImm()); - } + case AArch64::CSINVWr: + case AArch64::CSINVXr: + case AArch64::CSINCWr: + case AArch64::CSINCXr: + case AArch64::CSELWr: + case AArch64::CSELXr: + case AArch64::CSNEGWr: + case AArch64::CSNEGXr: + case AArch64::FCSELSrrr: + case AArch64::FCSELDrrr: { + int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); + assert(Idx >= 1); + return static_cast(Instr.getOperand(Idx - 1).getImm()); + } } } @@ -1268,42 +1281,42 @@ static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) { assert(CC != AArch64CC::Invalid); UsedNZCV UsedFlags; switch (CC) { - default: - break; + default: + break; - case AArch64CC::EQ: // Z set - case AArch64CC::NE: // Z clear - UsedFlags.Z = true; - break; + case AArch64CC::EQ: // Z set + case AArch64CC::NE: // Z clear + UsedFlags.Z = true; + break; - case AArch64CC::HI: // Z clear and C set - case AArch64CC::LS: // Z set or C clear - UsedFlags.Z = true; - LLVM_FALLTHROUGH; - case AArch64CC::HS: // C set - case AArch64CC::LO: // C clear - UsedFlags.C = true; - break; + case AArch64CC::HI: // Z clear and C set + case AArch64CC::LS: // Z set or C clear + UsedFlags.Z = true; + LLVM_FALLTHROUGH; + case AArch64CC::HS: // C set + case AArch64CC::LO: // C clear + UsedFlags.C = true; + break; - case AArch64CC::MI: // N set - case AArch64CC::PL: // N clear - UsedFlags.N = true; - break; + case AArch64CC::MI: // N set + case AArch64CC::PL: // N clear + UsedFlags.N = true; + break; - case AArch64CC::VS: // V set - case AArch64CC::VC: // V clear - UsedFlags.V = true; - break; + case AArch64CC::VS: // V set + case AArch64CC::VC: // V clear + UsedFlags.V = true; + break; - case AArch64CC::GT: // Z clear, N and V the same - case AArch64CC::LE: // Z set, N and V differ - UsedFlags.Z = true; - LLVM_FALLTHROUGH; - case AArch64CC::GE: // N and V the same - case AArch64CC::LT: // N and V differ - UsedFlags.N = true; - UsedFlags.V = true; - break; + case AArch64CC::GT: // Z clear, N and V the same + case AArch64CC::LE: // Z set, N and V differ + UsedFlags.Z = true; + LLVM_FALLTHROUGH; + case AArch64CC::GE: // N and V the same + case AArch64CC::LT: // N and V differ + UsedFlags.N = true; + UsedFlags.V = true; + break; } return UsedFlags; } @@ -1328,7 +1341,7 @@ static bool isSUBSRegImm(unsigned Opcode) { /// nor uses of flags between MI and CmpInstr. /// - and C/V flags are not used after CmpInstr static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr, - const TargetRegisterInfo *TRI) { + const TargetRegisterInfo *TRI) { assert(MI); assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END); assert(CmpInstr); @@ -1350,7 +1363,8 @@ static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr, return false; UsedNZCV NZCVUsedAfterCmp; - for (auto I = std::next(CmpInstr->getIterator()), E = CmpInstr->getParent()->instr_end(); + for (auto I = std::next(CmpInstr->getIterator()), + E = CmpInstr->getParent()->instr_end(); I != E; ++I) { const MachineInstr &Instr = *I; if (Instr.readsRegister(AArch64::NZCV, TRI)) { @@ -1363,7 +1377,7 @@ static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr, if (Instr.modifiesRegister(AArch64::NZCV, TRI)) break; } - + return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V; } @@ -1421,16 +1435,20 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { .addMemOperand(*MI.memoperands_begin()); } else if (TM.getCodeModel() == CodeModel::Large) { BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg) - .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0); + .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC) + .addImm(0); BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) .addReg(Reg, RegState::Kill) - .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16); + .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC) + .addImm(16); BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) .addReg(Reg, RegState::Kill) - .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32); + .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC) + .addImm(32); BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) .addReg(Reg, RegState::Kill) - .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48); + .addGlobalAddress(GV, 0, AArch64II::MO_G3) + .addImm(48); BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) .addReg(Reg, RegState::Kill) .addImm(0) @@ -1812,7 +1830,7 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth( } else return false; - // Get the scaling factor for the instruction and set the width for the + // Get the scaling factor for the instruction and set the width for the // instruction. unsigned Scale = 0; int64_t Dummy1, Dummy2; @@ -1835,10 +1853,10 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth( return true; } -MachineOperand& +MachineOperand & AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const { assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); - MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands()-1); + MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1); assert(OfsOp.isImm() && "Offset operand wasn't immediate."); return OfsOp; } @@ -1847,7 +1865,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width, int64_t &MinOffset, int64_t &MaxOffset) const { switch (Opcode) { - // Not a memory operation or something we want to handle. + // Not a memory operation or something we want to handle. default: Scale = Width = 0; MinOffset = MaxOffset = 0; @@ -2102,12 +2120,13 @@ static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, return ((DestReg - SrcReg) & 0x1f) < NumRegs; } -void AArch64InstrInfo::copyPhysRegTuple( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, - unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode, - ArrayRef Indices) const { - assert(Subtarget.hasNEON() && - "Unexpected register copy without NEON"); +void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, unsigned DestReg, + unsigned SrcReg, bool KillSrc, + unsigned Opcode, + ArrayRef Indices) const { + assert(Subtarget.hasNEON() && "Unexpected register copy without NEON"); const TargetRegisterInfo *TRI = &getRegisterInfo(); uint16_t DestEncoding = TRI->getEncodingValue(DestReg); uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); @@ -2160,8 +2179,9 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); } } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) { - BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm( - AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); + BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg) + .addImm(0) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); } else { if (Subtarget.hasZeroCycleRegMove()) { // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. @@ -2196,8 +2216,9 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addImm(0) .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) { - BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm( - AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); + BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg) + .addImm(0) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); } else { // Otherwise, expand to ORR XZR. BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg) @@ -2210,8 +2231,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copy a DDDD register quad by copying the individual sub-registers. if (AArch64::DDDDRegClass.contains(DestReg) && AArch64::DDDDRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1, - AArch64::dsub2, AArch64::dsub3 }; + static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1, + AArch64::dsub2, AArch64::dsub3}; copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, Indices); return; @@ -2220,8 +2241,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copy a DDD register triple by copying the individual sub-registers. if (AArch64::DDDRegClass.contains(DestReg) && AArch64::DDDRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1, - AArch64::dsub2 }; + static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1, + AArch64::dsub2}; copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, Indices); return; @@ -2230,7 +2251,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copy a DD register pair by copying the individual sub-registers. if (AArch64::DDRegClass.contains(DestReg) && AArch64::DDRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 }; + static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1}; copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, Indices); return; @@ -2239,8 +2260,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copy a QQQQ register quad by copying the individual sub-registers. if (AArch64::QQQQRegClass.contains(DestReg) && AArch64::QQQQRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1, - AArch64::qsub2, AArch64::qsub3 }; + static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1, + AArch64::qsub2, AArch64::qsub3}; copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, Indices); return; @@ -2249,8 +2270,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copy a QQQ register triple by copying the individual sub-registers. if (AArch64::QQQRegClass.contains(DestReg) && AArch64::QQQRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1, - AArch64::qsub2 }; + static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1, + AArch64::qsub2}; copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, Indices); return; @@ -2259,7 +2280,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copy a QQ register pair by copying the individual sub-registers. if (AArch64::QQRegClass.contains(DestReg) && AArch64::QQRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 }; + static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1}; copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, Indices); return; @@ -2267,28 +2288,28 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (AArch64::FPR128RegClass.contains(DestReg) && AArch64::FPR128RegClass.contains(SrcReg)) { - if(Subtarget.hasNEON()) { + if (Subtarget.hasNEON()) { BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) .addReg(SrcReg) .addReg(SrcReg, getKillRegState(KillSrc)); } else { BuildMI(MBB, I, DL, get(AArch64::STRQpre)) - .addReg(AArch64::SP, RegState::Define) - .addReg(SrcReg, getKillRegState(KillSrc)) - .addReg(AArch64::SP) - .addImm(-16); + .addReg(AArch64::SP, RegState::Define) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addReg(AArch64::SP) + .addImm(-16); BuildMI(MBB, I, DL, get(AArch64::LDRQpre)) - .addReg(AArch64::SP, RegState::Define) - .addReg(DestReg, RegState::Define) - .addReg(AArch64::SP) - .addImm(16); + .addReg(AArch64::SP, RegState::Define) + .addReg(DestReg, RegState::Define) + .addReg(AArch64::SP) + .addImm(16); } return; } if (AArch64::FPR64RegClass.contains(DestReg) && AArch64::FPR64RegClass.contains(SrcReg)) { - if(Subtarget.hasNEON()) { + if (Subtarget.hasNEON()) { DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub, &AArch64::FPR128RegClass); SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub, @@ -2305,7 +2326,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (AArch64::FPR32RegClass.contains(DestReg) && AArch64::FPR32RegClass.contains(SrcReg)) { - if(Subtarget.hasNEON()) { + if (Subtarget.hasNEON()) { DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub, &AArch64::FPR128RegClass); SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub, @@ -2322,7 +2343,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (AArch64::FPR16RegClass.contains(DestReg) && AArch64::FPR16RegClass.contains(SrcReg)) { - if(Subtarget.hasNEON()) { + if (Subtarget.hasNEON()) { DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR128RegClass); SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, @@ -2343,7 +2364,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (AArch64::FPR8RegClass.contains(DestReg) && AArch64::FPR8RegClass.contains(SrcReg)) { - if(Subtarget.hasNEON()) { + if (Subtarget.hasNEON()) { DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR128RegClass); SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, @@ -2392,17 +2413,17 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (DestReg == AArch64::NZCV) { assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy"); BuildMI(MBB, I, DL, get(AArch64::MSR)) - .addImm(AArch64SysReg::NZCV) - .addReg(SrcReg, getKillRegState(KillSrc)) - .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define); + .addImm(AArch64SysReg::NZCV) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define); return; } if (SrcReg == AArch64::NZCV) { assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy"); BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg) - .addImm(AArch64SysReg::NZCV) - .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc)); + .addImm(AArch64SysReg::NZCV) + .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc)); return; } @@ -2458,45 +2479,39 @@ void AArch64InstrInfo::storeRegToStackSlot( if (AArch64::FPR128RegClass.hasSubClassEq(RC)) Opc = AArch64::STRQui; else if (AArch64::DDRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasNEON() && - "Unexpected register store without NEON"); + assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Twov1d; Offset = false; } break; case 24: if (AArch64::DDDRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasNEON() && - "Unexpected register store without NEON"); + assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Threev1d; Offset = false; } break; case 32: if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasNEON() && - "Unexpected register store without NEON"); + assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Fourv1d; Offset = false; } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasNEON() && - "Unexpected register store without NEON"); + assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Twov2d; Offset = false; } break; case 48: if (AArch64::QQQRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasNEON() && - "Unexpected register store without NEON"); + assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Threev2d; Offset = false; } break; case 64: if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasNEON() && - "Unexpected register store without NEON"); + assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Fourv2d; Offset = false; } @@ -2505,8 +2520,8 @@ void AArch64InstrInfo::storeRegToStackSlot( assert(Opc && "Unknown register class"); const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI); + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI); if (Offset) MI.addImm(0); @@ -2562,45 +2577,39 @@ void AArch64InstrInfo::loadRegFromStackSlot( if (AArch64::FPR128RegClass.hasSubClassEq(RC)) Opc = AArch64::LDRQui; else if (AArch64::DDRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasNEON() && - "Unexpected register load without NEON"); + assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Twov1d; Offset = false; } break; case 24: if (AArch64::DDDRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasNEON() && - "Unexpected register load without NEON"); + assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Threev1d; Offset = false; } break; case 32: if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasNEON() && - "Unexpected register load without NEON"); + assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Fourv1d; Offset = false; } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasNEON() && - "Unexpected register load without NEON"); + assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Twov2d; Offset = false; } break; case 48: if (AArch64::QQQRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasNEON() && - "Unexpected register load without NEON"); + assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Threev2d; Offset = false; } break; case 64: if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasNEON() && - "Unexpected register load without NEON"); + assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Fourv2d; Offset = false; } @@ -2609,8 +2618,8 @@ void AArch64InstrInfo::loadRegFromStackSlot( assert(Opc && "Unknown register class"); const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc)) - .addReg(DestReg, getDefRegState(true)) - .addFrameIndex(FI); + .addReg(DestReg, getDefRegState(true)) + .addFrameIndex(FI); if (Offset) MI.addImm(0); MI.addMemOperand(MMO); @@ -2755,7 +2764,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) { assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) == - TRI.getRegSizeInBits(*getRegClass(SrcReg)) && + TRI.getRegSizeInBits(*getRegClass(SrcReg)) && "Mismatched register size in non subreg COPY"); if (IsSpill) storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex, @@ -3138,10 +3147,7 @@ void AArch64InstrInfo::getNoop(MCInst &NopInst) const { } // AArch64 supports MachineCombiner. -bool AArch64InstrInfo::useMachineCombiner() const { - - return true; -} +bool AArch64InstrInfo::useMachineCombiner() const { return true; } // True when Opc sets flag static bool isCombineInstrSettingFlag(unsigned Opc) { @@ -3275,7 +3281,8 @@ static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, // 1. Other data types (integer, vectors) // 2. Other math / logic operations (xor, or) // 3. Other forms of the same operation (intrinsics and other variants) -bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { +bool AArch64InstrInfo::isAssociativeAndCommutative( + const MachineInstr &Inst) const { switch (Inst.getOpcode()) { case AArch64::FADDDrr: case AArch64::FADDSrr: @@ -3595,8 +3602,8 @@ static bool getFMAPatterns(MachineInstr &Root, /// Return true when a code sequence can improve throughput. It /// should be called only for instructions in loops. /// \param Pattern - combiner pattern -bool -AArch64InstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const { +bool AArch64InstrInfo::isThroughputPattern( + MachineCombinerPattern Pattern) const { switch (Pattern) { default: break; @@ -3747,8 +3754,8 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl &InsInstrs, - unsigned IdxMulOpd, unsigned MaddOpc, - unsigned VR, const TargetRegisterClass *RC) { + unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, + const TargetRegisterClass *RC) { assert(IdxMulOpd == 1 || IdxMulOpd == 2); MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); @@ -3767,11 +3774,11 @@ static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, if (TargetRegisterInfo::isVirtualRegister(VR)) MRI.constrainRegClass(VR, RC); - MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), - ResultReg) - .addReg(SrcReg0, getKillRegState(Src0IsKill)) - .addReg(SrcReg1, getKillRegState(Src1IsKill)) - .addReg(VR); + MachineInstrBuilder MIB = + BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) + .addReg(SrcReg0, getKillRegState(Src0IsKill)) + .addReg(SrcReg1, getKillRegState(Src1IsKill)) + .addReg(VR); // Insert the MADD InsInstrs.push_back(MIB); return MUL; @@ -4401,12 +4408,9 @@ AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const { using namespace AArch64II; static const std::pair TargetFlags[] = { - {MO_PAGE, "aarch64-page"}, - {MO_PAGEOFF, "aarch64-pageoff"}, - {MO_G3, "aarch64-g3"}, - {MO_G2, "aarch64-g2"}, - {MO_G1, "aarch64-g1"}, - {MO_G0, "aarch64-g0"}, + {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"}, + {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"}, + {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"}, {MO_HI12, "aarch64-hi12"}}; return makeArrayRef(TargetFlags); } @@ -4416,9 +4420,7 @@ AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { using namespace AArch64II; static const std::pair TargetFlags[] = { - {MO_GOT, "aarch64-got"}, - {MO_NC, "aarch64-nc"}, - {MO_TLS, "aarch64-tls"}}; + {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"}, {MO_TLS, "aarch64-tls"}}; return makeArrayRef(TargetFlags); } @@ -4430,26 +4432,27 @@ AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const { return makeArrayRef(TargetFlags); } -unsigned AArch64InstrInfo::getOutliningBenefit(size_t SequenceSize, - size_t Occurrences, - bool CanBeTailCall) const { - unsigned NotOutlinedSize = SequenceSize * Occurrences; - unsigned OutlinedSize; - - // Is this candidate something we can outline as a tail call? - if (CanBeTailCall) { - // If yes, then we just outline the sequence and replace each of its - // occurrences with a branch instruction. - OutlinedSize = SequenceSize + Occurrences; - } else { - // If no, then we outline the sequence (SequenceSize), add a return (+1), - // and replace each occurrence with a save/restore to LR and a call - // (3 * Occurrences) - OutlinedSize = (SequenceSize + 1) + (3 * Occurrences); - } +size_t AArch64InstrInfo::getOutliningCallOverhead( + MachineBasicBlock::iterator &StartIt, + MachineBasicBlock::iterator &EndIt) const { + // Is this a tail-call? + if (EndIt->isTerminator()) + return 1; // Yes, so we don't need to save/restore LR. - // Return the number of instructions saved by outlining this sequence. - return NotOutlinedSize > OutlinedSize ? NotOutlinedSize - OutlinedSize : 0; + // No, so save + restore LR. + return 3; +} + +size_t AArch64InstrInfo::getOutliningFrameOverhead( + MachineBasicBlock::iterator &StartIt, + MachineBasicBlock::iterator &EndIt) const { + + // Is this a tail-call? + if (EndIt->isTerminator()) + return 0; // Yes, so we already have a return. + + // No, so we have to add a return to the end. + return 1; } bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const { @@ -4475,7 +4478,7 @@ AArch64InstrInfo::getOutliningType(MachineInstr &MI) const { // Is this the end of a function? if (MI.getParent()->succ_empty()) - return MachineOutlinerInstrType::Legal; + return MachineOutlinerInstrType::Legal; // It's not, so don't outline it. return MachineOutlinerInstrType::Illegal; @@ -4494,7 +4497,7 @@ AArch64InstrInfo::getOutliningType(MachineInstr &MI) const { // Don't outline anything that uses the link register. if (MI.modifiesRegister(AArch64::LR, &RI) || MI.readsRegister(AArch64::LR, &RI)) - return MachineOutlinerInstrType::Illegal; + return MachineOutlinerInstrType::Illegal; // Does this use the stack? if (MI.modifiesRegister(AArch64::SP, &RI) || @@ -4502,13 +4505,13 @@ AArch64InstrInfo::getOutliningType(MachineInstr &MI) const { // Is it a memory operation? if (MI.mayLoadOrStore()) { - unsigned Base; // Filled with the base regiser of MI. + unsigned Base; // Filled with the base regiser of MI. int64_t Offset; // Filled with the offset of MI. unsigned DummyWidth; // Does it allow us to offset the base register and is the base SP? if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) || - Base != AArch64::SP) + Base != AArch64::SP) return MachineOutlinerInstrType::Illegal; // Find the minimum/maximum offset for this instruction and check if @@ -4522,7 +4525,7 @@ AArch64InstrInfo::getOutliningType(MachineInstr &MI) const { // This is tricky to test with IR tests, but when the outliner is moved // to a MIR test, it really ought to be checked. if (Offset + 16 < MinOffset || Offset + 16 > MaxOffset) - return MachineOutlinerInstrType::Illegal; + return MachineOutlinerInstrType::Illegal; // It's in range, so we can outline it. return MachineOutlinerInstrType::Legal; @@ -4558,7 +4561,7 @@ void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { // We've pushed the return address to the stack, so add 16 to the offset. // This is safe, since we already checked if it would overflow when we // checked if this instruction was legal to outline. - int64_t NewImm = (Offset + 16)/Scale; + int64_t NewImm = (Offset + 16) / Scale; StackOffsetOperand.setImm(NewImm); } } @@ -4624,4 +4627,3 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall( return It; } - diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h index 64f9743ab94..076a32f911f 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -299,8 +299,10 @@ public: getSerializableMachineMemOperandTargetFlags() const override; bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override; - unsigned getOutliningBenefit(size_t SequenceSize, size_t Occurrences, - bool CanBeTailCall) const override; + size_t getOutliningCallOverhead(MachineBasicBlock::iterator &StartIt, + MachineBasicBlock::iterator &EndIt) const override; + size_t getOutliningFrameOverhead(MachineBasicBlock::iterator &StartIt, + MachineBasicBlock::iterator &EndIt) const override; AArch64GenInstrInfo::MachineOutlinerInstrType getOutliningType(MachineInstr &MI) const override; void insertOutlinerEpilogue(MachineBasicBlock &MBB, diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index f01025545e7..8eb1536790d 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -10537,25 +10537,22 @@ char LDTLSCleanup::ID = 0; FunctionPass* llvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); } -unsigned X86InstrInfo::getOutliningBenefit(size_t SequenceSize, - size_t Occurrences, - bool CanBeTailCall) const { - unsigned NotOutlinedSize = SequenceSize * Occurrences; - unsigned OutlinedSize; +size_t X86InstrInfo::getOutliningCallOverhead( +MachineBasicBlock::iterator &StartIt, +MachineBasicBlock::iterator &EndIt) const { + // We just have to emit a call, so return 1. + return 1; +} - // Is it a tail call? - if (CanBeTailCall) { - // If yes, we don't have to include a return instruction-- it's already in - // our sequence. So we have one occurrence of the sequence + #Occurrences - // calls. - OutlinedSize = SequenceSize + Occurrences; - } else { - // If not, add one for the return instruction. - OutlinedSize = (SequenceSize + 1) + Occurrences; - } +size_t X86InstrInfo::getOutliningFrameOverhead( +MachineBasicBlock::iterator &StartIt, +MachineBasicBlock::iterator &EndIt) const { + // Is this a tail-call? + if (EndIt->isTerminator()) + return 0; // Yes, so we already have a return. - // Return the number of instructions saved by outlining this sequence. - return NotOutlinedSize > OutlinedSize ? NotOutlinedSize - OutlinedSize : 0; + // No, so we have to add a return to the end. + return 1; } bool X86InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const { diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index e64876073cc..38caf04f7f8 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -566,9 +566,11 @@ public: ArrayRef> getSerializableDirectMachineOperandTargetFlags() const override; - unsigned getOutliningBenefit(size_t SequenceSize, - size_t Occurrences, - bool CanBeTailCall) const override; + size_t getOutliningCallOverhead(MachineBasicBlock::iterator &StartIt, + MachineBasicBlock::iterator &EndIt) const override; + + size_t getOutliningFrameOverhead(MachineBasicBlock::iterator &StartIt, + MachineBasicBlock::iterator &EndIt) const override; bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override;