//===----------------------------------------------------------------------===//
#include "Views/BottleneckAnalysis.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/MCA/Support.h"
#include "llvm/Support/Format.h"
#define DEBUG_TYPE "llvm-mca"
-BottleneckAnalysis::BottleneckAnalysis(const MCSchedModel &Model)
- : SM(Model), TotalCycles(0), BPI({0, 0, 0, 0, 0}),
+PressureTracker::PressureTracker(const MCSchedModel &Model)
+ : SM(Model),
ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0),
- ProcResourceMasks(Model.getNumProcResourceKinds()),
+ ProcResID2Mask(Model.getNumProcResourceKinds(), 0),
ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0),
- PressureIncreasedBecauseOfResources(false),
- PressureIncreasedBecauseOfDataDependencies(false),
- SeenStallCycles(false) {
- computeProcResourceMasks(SM, ProcResourceMasks);
- for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
- unsigned Index = getResourceStateIndex(ProcResourceMasks[I]);
- ResIdx2ProcResID[Index] = I;
+ ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) {
+ computeProcResourceMasks(SM, ProcResID2Mask);
+
+ // Ignore the invalid resource at index zero.
+ unsigned NextResourceUsersIdx = 0;
+ for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
+ ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx;
+ NextResourceUsersIdx += ProcResource.NumUnits;
+ uint64_t ResourceMask = ProcResID2Mask[I];
+ ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I;
+ }
+
+ ResourceUsers.resize(NextResourceUsersIdx);
+ std::fill(ResourceUsers.begin(), ResourceUsers.end(), ~0U);
+}
+
+void PressureTracker::getUniqueUsers(
+ uint64_t ResourceMask, SmallVectorImpl<unsigned> &UniqueUsers) const {
+ unsigned Index = getResourceStateIndex(ResourceMask);
+ unsigned ProcResID = ResIdx2ProcResID[Index];
+ const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
+ for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) {
+ unsigned From = getResourceUser(ProcResID, I);
+ if (find(UniqueUsers, From) == UniqueUsers.end())
+ UniqueUsers.emplace_back(From);
+ }
+}
+
+void PressureTracker::handleInstructionEvent(const HWInstructionEvent &Event) {
+ unsigned IID = Event.IR.getSourceIndex();
+ switch (Event.Type) {
+ default:
+ break;
+ case HWInstructionEvent::Dispatched:
+ IPI.insert(std::make_pair(IID, InstructionPressureInfo()));
+ break;
+ case HWInstructionEvent::Executed:
+ IPI.erase(IID);
+ break;
+ case HWInstructionEvent::Issued: {
+ const auto &IIE = static_cast<const HWInstructionIssuedEvent &>(Event);
+ using ResourceRef = HWInstructionIssuedEvent::ResourceRef;
+ using ResourceUse = std::pair<ResourceRef, ResourceCycles>;
+ for (const ResourceUse &Use : IIE.UsedResources) {
+ const ResourceRef &RR = Use.first;
+ unsigned Index = ProcResID2ResourceUsersIndex[RR.first];
+ Index += countTrailingZeros(RR.second);
+ ResourceUsers[Index] = IID;
+ }
+ }
+ }
+}
+
+void PressureTracker::updateResourcePressureDistribution(
+ uint64_t CumulativeMask) {
+ while (CumulativeMask) {
+ uint64_t Current = CumulativeMask & (-CumulativeMask);
+ unsigned ResIdx = getResourceStateIndex(Current);
+ unsigned ProcResID = ResIdx2ProcResID[ResIdx];
+ uint64_t Mask = ProcResID2Mask[ProcResID];
+
+ if (Mask == Current) {
+ ResourcePressureDistribution[ProcResID]++;
+ CumulativeMask ^= Current;
+ continue;
+ }
+
+ Mask ^= Current;
+ while (Mask) {
+ uint64_t SubUnit = Mask & (-Mask);
+ ResIdx = getResourceStateIndex(SubUnit);
+ ProcResID = ResIdx2ProcResID[ResIdx];
+ ResourcePressureDistribution[ProcResID]++;
+ Mask ^= SubUnit;
+ }
+
+ CumulativeMask ^= Current;
+ }
+}
+
+void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) {
+ assert(Event.Reason != HWPressureEvent::INVALID &&
+ "Unexpected invalid event!");
+
+ switch (Event.Reason) {
+ default:
+ break;
+
+ case HWPressureEvent::RESOURCES: {
+ const uint64_t ResourceMask = Event.ResourceMask;
+ updateResourcePressureDistribution(Event.ResourceMask);
+
+ for (const InstRef &IR : Event.AffectedInstructions) {
+ const Instruction &IS = *IR.getInstruction();
+ unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask;
+ if (!BusyResources)
+ continue;
+
+ IPI[IR.getSourceIndex()].ResourcePressureCycles++;
+ }
+ break;
+ }
+
+ case HWPressureEvent::REGISTER_DEPS:
+ for (const InstRef &IR : Event.AffectedInstructions) {
+ unsigned IID = IR.getSourceIndex();
+ IPI[IID].RegisterPressureCycles++;
+ }
+ break;
+
+ case HWPressureEvent::MEMORY_DEPS:
+ for (const InstRef &IR : Event.AffectedInstructions) {
+ unsigned IID = IR.getSourceIndex();
+ IPI[IID].MemoryPressureCycles++;
+ }
+ }
+}
+
+#ifndef NDEBUG
+void DependencyGraph::dumpRegDeps(raw_ostream &OS, MCInstPrinter &MCIP) const {
+ OS << "\nREG DEPS\n";
+ for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
+ const DGNode &Node = Nodes[I];
+ for (const DependencyEdge &DE : Node.RegDeps) {
+ bool LoopCarried = I >= DE.IID;
+ OS << " FROM: " << I << " TO: " << DE.IID
+ << (LoopCarried ? " (loop carried)" : " ")
+ << " - REGISTER: ";
+ MCIP.printRegName(OS, DE.ResourceOrRegID);
+ OS << " - CYCLES: " << DE.Cycles << '\n';
+ }
+ }
+}
+
+void DependencyGraph::dumpMemDeps(raw_ostream &OS) const {
+ OS << "\nMEM DEPS\n";
+ for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
+ const DGNode &Node = Nodes[I];
+ for (const DependencyEdge &DE : Node.MemDeps) {
+ bool LoopCarried = I >= DE.IID;
+ OS << " FROM: " << I << " TO: " << DE.IID
+ << (LoopCarried ? " (loop carried)" : " ")
+ << " - MEMORY - CYCLES: " << DE.Cycles << '\n';
+ }
+ }
+}
+
+void DependencyGraph::dumpResDeps(raw_ostream &OS) const {
+ OS << "\nRESOURCE DEPS\n";
+ for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
+ const DGNode &Node = Nodes[I];
+ for (const DependencyEdge &DE : Node.ResDeps) {
+ bool LoopCarried = I >= DE.IID;
+ OS << " FROM: " << I << " TO: " << DE.IID
+ << (LoopCarried ? "(loop carried)" : " ")
+ << " - RESOURCE MASK: " << DE.ResourceOrRegID;
+ OS << " - CYCLES: " << DE.Cycles << '\n';
+ }
+ }
+}
+#endif // NDEBUG
+
+void DependencyGraph::addDepImpl(SmallVectorImpl<DependencyEdge> &Vec,
+ DependencyEdge &&Dep) {
+ auto It = find_if(Vec, [Dep](DependencyEdge &DE) {
+ return DE.IID == Dep.IID && DE.ResourceOrRegID == Dep.ResourceOrRegID;
+ });
+
+ if (It != Vec.end()) {
+ It->Cycles += Dep.Cycles;
+ return;
+ }
+
+ Vec.emplace_back(Dep);
+ Nodes[Dep.IID].NumPredecessors++;
+}
+
+BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti,
+ MCInstPrinter &Printer,
+ ArrayRef<MCInst> Sequence,
+ unsigned Executions)
+ : STI(sti), MCIP(Printer), Tracker(STI.getSchedModel()),
+ DG(Sequence.size()), Source(Sequence), Iterations(Executions),
+ TotalCycles(0), PressureIncreasedBecauseOfResources(false),
+ PressureIncreasedBecauseOfRegisterDependencies(false),
+ PressureIncreasedBecauseOfMemoryDependencies(false),
+ SeenStallCycles(false), BPI() {}
+
+void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) {
+ Tracker.handleInstructionEvent(Event);
+ if (Event.Type != HWInstructionEvent::Issued)
+ return;
+
+ const unsigned IID = Event.IR.getSourceIndex();
+ const Instruction &IS = *Event.IR.getInstruction();
+ unsigned Cycles = Tracker.getRegisterPressureCycles(IID);
+ unsigned To = IID % Source.size();
+ if (Cycles) {
+ const CriticalDependency &RegDep = IS.getCriticalRegDep();
+ unsigned From = RegDep.IID % Source.size();
+ DG.addRegDep(From, To, RegDep.RegID, Cycles);
+ }
+ Cycles = Tracker.getMemoryPressureCycles(IID);
+ if (Cycles) {
+ const CriticalDependency &MemDep = IS.getCriticalMemDep();
+ unsigned From = MemDep.IID % Source.size();
+ DG.addMemDep(From, To, Cycles);
}
}
assert(Event.Reason != HWPressureEvent::INVALID &&
"Unexpected invalid event!");
+ Tracker.handlePressureEvent(Event);
+
switch (Event.Reason) {
default:
break;
case HWPressureEvent::RESOURCES: {
PressureIncreasedBecauseOfResources = true;
- ++BPI.ResourcePressureCycles;
- uint64_t ResourceMask = Event.ResourceMask;
- while (ResourceMask) {
- uint64_t Current = ResourceMask & (-ResourceMask);
- unsigned Index = getResourceStateIndex(Current);
- unsigned ProcResID = ResIdx2ProcResID[Index];
- const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
- if (!PRDesc.SubUnitsIdxBegin) {
- ResourcePressureDistribution[Index]++;
- ResourceMask ^= Current;
- continue;
- }
- for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) {
- unsigned OtherProcResID = PRDesc.SubUnitsIdxBegin[I];
- unsigned OtherMask = ProcResourceMasks[OtherProcResID];
- ResourcePressureDistribution[getResourceStateIndex(OtherMask)]++;
+ SmallVector<unsigned, 4> UniqueUsers;
+ for (const InstRef &IR : Event.AffectedInstructions) {
+ const Instruction &IS = *IR.getInstruction();
+ unsigned To = IR.getSourceIndex() % Source.size();
+ unsigned BusyResources =
+ IS.getCriticalResourceMask() & Event.ResourceMask;
+ while (BusyResources) {
+ uint64_t Current = BusyResources & (-BusyResources);
+ Tracker.getUniqueUsers(Current, UniqueUsers);
+ for (unsigned User : UniqueUsers)
+ DG.addResourceDep(User % Source.size(), To, Current, 1);
+ BusyResources ^= Current;
}
-
- ResourceMask ^= Current;
+ UniqueUsers.clear();
}
+
break;
}
case HWPressureEvent::REGISTER_DEPS:
- PressureIncreasedBecauseOfDataDependencies = true;
- ++BPI.RegisterDependencyCycles;
+ PressureIncreasedBecauseOfRegisterDependencies = true;
break;
case HWPressureEvent::MEMORY_DEPS:
- PressureIncreasedBecauseOfDataDependencies = true;
- ++BPI.MemoryDependencyCycles;
+ PressureIncreasedBecauseOfMemoryDependencies = true;
break;
}
}
+void BottleneckAnalysis::onCycleEnd() {
+ ++TotalCycles;
+
+ bool PressureIncreasedBecauseOfDataDependencies =
+ PressureIncreasedBecauseOfRegisterDependencies ||
+ PressureIncreasedBecauseOfMemoryDependencies;
+ if (!PressureIncreasedBecauseOfResources &&
+ !PressureIncreasedBecauseOfDataDependencies)
+ return;
+
+ ++BPI.PressureIncreaseCycles;
+ if (PressureIncreasedBecauseOfRegisterDependencies)
+ ++BPI.RegisterDependencyCycles;
+ if (PressureIncreasedBecauseOfMemoryDependencies)
+ ++BPI.MemoryDependencyCycles;
+ if (PressureIncreasedBecauseOfDataDependencies)
+ ++BPI.DataDependencyCycles;
+ if (PressureIncreasedBecauseOfResources)
+ ++BPI.ResourcePressureCycles;
+ PressureIncreasedBecauseOfResources = false;
+ PressureIncreasedBecauseOfRegisterDependencies = false;
+ PressureIncreasedBecauseOfMemoryDependencies = false;
+}
+
void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
if (!SeenStallCycles || !BPI.PressureIncreaseCycles) {
OS << "\nNo resource or data dependency bottlenecks discovered.\n";
<< "% ]";
if (BPI.PressureIncreaseCycles) {
- for (unsigned I = 0, E = ResourcePressureDistribution.size(); I < E; ++I) {
- if (ResourcePressureDistribution[I]) {
- double Frequency =
- (double)ResourcePressureDistribution[I] * 100 / TotalCycles;
- unsigned Index = ResIdx2ProcResID[getResourceStateIndex(1ULL << I)];
- const MCProcResourceDesc &PRDesc = *SM.getProcResource(Index);
+ ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution();
+ const MCSchedModel &SM = STI.getSchedModel();
+ for (unsigned I = 0, E = Distribution.size(); I < E; ++I) {
+ unsigned ResourceCycles = Distribution[I];
+ if (ResourceCycles) {
+ double Frequency = (double)ResourceCycles * 100 / TotalCycles;
+ const MCProcResourceDesc &PRDesc = *SM.getProcResource(I);
OS << "\n - " << PRDesc.Name << " [ "
<< format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]";
}
OS << "\n Data Dependencies: [ "
<< format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]";
-
OS << "\n - Register Dependencies [ "
<< format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100)
<< "% ]";
-
OS << "\n - Memory Dependencies [ "
<< format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100)
<< "% ]\n\n";
printBottleneckHints(TempStream);
TempStream.flush();
OS << Buffer;
+ LLVM_DEBUG(DG.dump(OS, MCIP));
}
+
} // namespace mca.
} // namespace llvm
/// \file
///
/// This file implements the bottleneck analysis view.
-///
+///
/// This view internally observes backend pressure increase events in order to
/// identify potential sources of bottlenecks.
-///
+///
/// Example of bottleneck analysis report:
///
/// Cycles with backend pressure increase [ 33.40% ]
#include "Views/View.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCSchedule.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
namespace mca {
+class PressureTracker {
+ const MCSchedModel &SM;
+
+ // Resource pressure distribution. There is an element for every processor
+ // resource declared by the scheduling model. Quantities are number of cycles.
+ SmallVector<unsigned, 4> ResourcePressureDistribution;
+
+ // Each processor resource is associated with a so-called processor resource
+ // mask. This vector allows to correlate processor resource IDs with processor
+ // resource masks. There is exactly one element per each processor resource
+ // declared by the scheduling model.
+ SmallVector<uint64_t, 4> ProcResID2Mask;
+
+ // Maps processor resource state indices (returned by calls to
+ // `getResourceStateIndex(Mask)` to processor resource identifiers.
+ SmallVector<unsigned, 4> ResIdx2ProcResID;
+
+ // Maps Processor Resource identifiers to ResourceUsers indices.
+ SmallVector<unsigned, 4> ProcResID2ResourceUsersIndex;
+
+ // Identifies the last user of a processor resource unit.
+ // This vector is updated on every instruction issued event.
+ // There is one entry for every processor resource unit declared by the
+ // processor model. An all_ones value is treated like an invalid instruction
+ // identifier.
+ SmallVector<unsigned, 4> ResourceUsers;
+
+ struct InstructionPressureInfo {
+ unsigned RegisterPressureCycles;
+ unsigned MemoryPressureCycles;
+ unsigned ResourcePressureCycles;
+ };
+ DenseMap<unsigned, InstructionPressureInfo> IPI;
+
+ void updateResourcePressureDistribution(uint64_t CumulativeMask);
+
+ unsigned getResourceUser(unsigned ProcResID, unsigned UnitID) const {
+ unsigned Index = ProcResID2ResourceUsersIndex[ProcResID];
+ return ResourceUsers[Index + UnitID];
+ }
+
+public:
+ PressureTracker(const MCSchedModel &Model);
+
+ ArrayRef<unsigned> getResourcePressureDistribution() const {
+ return ResourcePressureDistribution;
+ }
+
+ void getUniqueUsers(uint64_t ResourceMask,
+ SmallVectorImpl<unsigned> &Users) const;
+
+ unsigned getRegisterPressureCycles(unsigned IID) const {
+ assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!");
+ const InstructionPressureInfo &Info = IPI.find(IID)->second;
+ return Info.RegisterPressureCycles;
+ }
+
+ unsigned getMemoryPressureCycles(unsigned IID) const {
+ assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!");
+ const InstructionPressureInfo &Info = IPI.find(IID)->second;
+ return Info.MemoryPressureCycles;
+ }
+
+ unsigned getResourcePressureCycles(unsigned IID) const {
+ assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!");
+ const InstructionPressureInfo &Info = IPI.find(IID)->second;
+ return Info.ResourcePressureCycles;
+ }
+
+ void handlePressureEvent(const HWPressureEvent &Event);
+ void handleInstructionEvent(const HWInstructionEvent &Event);
+};
+
+class DependencyGraph {
+ struct DependencyEdge {
+ unsigned IID;
+ uint64_t ResourceOrRegID;
+ uint64_t Cycles;
+ };
+
+ struct DGNode {
+ unsigned NumPredecessors;
+ SmallVector<DependencyEdge, 8> RegDeps;
+ SmallVector<DependencyEdge, 8> MemDeps;
+ SmallVector<DependencyEdge, 8> ResDeps;
+ };
+ SmallVector<DGNode, 16> Nodes;
+
+ void addDepImpl(SmallVectorImpl<DependencyEdge> &Vec, DependencyEdge &&DE);
+
+ DependencyGraph(const DependencyGraph &) = delete;
+ DependencyGraph &operator=(const DependencyGraph &) = delete;
+
+public:
+ DependencyGraph(unsigned NumNodes) : Nodes(NumNodes, DGNode()) {}
+
+ void addRegDep(unsigned From, unsigned To, unsigned RegID, unsigned Cy) {
+ addDepImpl(Nodes[From].RegDeps, {To, RegID, Cy});
+ }
+ void addMemDep(unsigned From, unsigned To, unsigned Cy) {
+ addDepImpl(Nodes[From].MemDeps, {To, /* unused */ 0, Cy});
+ }
+ void addResourceDep(unsigned From, unsigned To, uint64_t Mask, unsigned Cy) {
+ addDepImpl(Nodes[From].ResDeps, {To, Mask, Cy});
+ }
+
+#ifndef NDEBUG
+ void dumpRegDeps(raw_ostream &OS, MCInstPrinter &MCIP) const;
+ void dumpMemDeps(raw_ostream &OS) const;
+ void dumpResDeps(raw_ostream &OS) const;
+
+ void dump(raw_ostream &OS, llvm::MCInstPrinter &MCIP) const {
+ dumpRegDeps(OS, MCIP);
+ dumpMemDeps(OS);
+ dumpResDeps(OS);
+ }
+#endif
+};
+
/// A view that collects and prints a few performance numbers.
class BottleneckAnalysis : public View {
- const llvm::MCSchedModel &SM;
+ const MCSubtargetInfo &STI;
+ MCInstPrinter &MCIP;
+ PressureTracker Tracker;
+ DependencyGraph DG;
+
+ ArrayRef<MCInst> Source;
+ unsigned Iterations;
unsigned TotalCycles;
+ bool PressureIncreasedBecauseOfResources;
+ bool PressureIncreasedBecauseOfRegisterDependencies;
+ bool PressureIncreasedBecauseOfMemoryDependencies;
+ // True if throughput was affected by dispatch stalls.
+ bool SeenStallCycles;
+
struct BackPressureInfo {
// Cycles where backpressure increased.
unsigned PressureIncreaseCycles;
};
BackPressureInfo BPI;
- // Resource pressure distribution. There is an element for every processor
- // resource declared by the scheduling model. Quantities are number of cycles.
- llvm::SmallVector<unsigned, 8> ResourcePressureDistribution;
-
- // Each processor resource is associated with a so-called processor resource
- // mask. This vector allows to correlate processor resource IDs with processor
- // resource masks. There is exactly one element per each processor resource
- // declared by the scheduling model.
- llvm::SmallVector<uint64_t, 8> ProcResourceMasks;
-
- // Used to map resource indices to actual processor resource IDs.
- llvm::SmallVector<unsigned, 8> ResIdx2ProcResID;
-
- // True if resource pressure events were notified during this cycle.
- bool PressureIncreasedBecauseOfResources;
- bool PressureIncreasedBecauseOfDataDependencies;
-
- // True if throughput was affected by dispatch stalls.
- bool SeenStallCycles;
-
// Prints a bottleneck message to OS.
- void printBottleneckHints(llvm::raw_ostream &OS) const;
+ void printBottleneckHints(raw_ostream &OS) const;
public:
- BottleneckAnalysis(const llvm::MCSchedModel &Model);
-
- void onCycleEnd() override {
- ++TotalCycles;
- if (PressureIncreasedBecauseOfResources ||
- PressureIncreasedBecauseOfDataDependencies) {
- ++BPI.PressureIncreaseCycles;
- if (PressureIncreasedBecauseOfDataDependencies)
- ++BPI.DataDependencyCycles;
- PressureIncreasedBecauseOfResources = false;
- PressureIncreasedBecauseOfDataDependencies = false;
- }
- }
+ BottleneckAnalysis(const MCSubtargetInfo &STI, MCInstPrinter &MCIP,
+ ArrayRef<MCInst> Sequence, unsigned Iterations);
+ void onCycleEnd() override;
void onEvent(const HWStallEvent &Event) override { SeenStallCycles = true; }
-
void onEvent(const HWPressureEvent &Event) override;
+ void onEvent(const HWInstructionEvent &Event) override;
void printView(llvm::raw_ostream &OS) const override;
};