From: Andrea Di Biagio Date: Wed, 17 Apr 2019 06:02:05 +0000 (+0000) Subject: [MCA] Moved the bottleneck analysis to its own file. NFCI X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=430cf96dccd9011e71eb191ede264b71fe671ae6;p=llvm [MCA] Moved the bottleneck analysis to its own file. NFCI git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358554 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/tools/llvm-mca/CMakeLists.txt b/tools/llvm-mca/CMakeLists.txt index 1fceb08c1ca..4b18075f986 100644 --- a/tools/llvm-mca/CMakeLists.txt +++ b/tools/llvm-mca/CMakeLists.txt @@ -17,6 +17,7 @@ add_llvm_tool(llvm-mca CodeRegion.cpp CodeRegionGenerator.cpp PipelinePrinter.cpp + Views/BottleneckAnalysis.cpp Views/DispatchStatistics.cpp Views/InstructionInfoView.cpp Views/RegisterFileStatistics.cpp diff --git a/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/tools/llvm-mca/Views/BottleneckAnalysis.cpp new file mode 100644 index 00000000000..182a97aed37 --- /dev/null +++ b/tools/llvm-mca/Views/BottleneckAnalysis.cpp @@ -0,0 +1,142 @@ +//===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the functionalities used by the BottleneckAnalysis +/// to report bottleneck info. +/// +//===----------------------------------------------------------------------===// + +#include "Views/BottleneckAnalysis.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MCA/Support.h" +#include "llvm/Support/Format.h" + +namespace llvm { +namespace mca { + +#define DEBUG_TYPE "llvm-mca" + +BottleneckAnalysis::BottleneckAnalysis(const MCSchedModel &Model) + : SM(Model), TotalCycles(0), BPI({0, 0, 0, 0, 0}), + ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0), + ProcResourceMasks(Model.getNumProcResourceKinds()), + ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0), + PressureIncreasedBecauseOfResources(false), + PressureIncreasedBecauseOfDataDependencies(false), + SeenStallCycles(false) { + computeProcResourceMasks(SM, ProcResourceMasks); + for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { + unsigned Index = getResourceStateIndex(ProcResourceMasks[I]); + ResIdx2ProcResID[Index] = I; + } +} + +void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) { + assert(Event.Reason != HWPressureEvent::INVALID && + "Unexpected invalid event!"); + + switch (Event.Reason) { + default: + break; + + case HWPressureEvent::RESOURCES: { + PressureIncreasedBecauseOfResources = true; + ++BPI.ResourcePressureCycles; + uint64_t ResourceMask = Event.ResourceMask; + while (ResourceMask) { + uint64_t Current = ResourceMask & (-ResourceMask); + unsigned Index = getResourceStateIndex(Current); + unsigned ProcResID = ResIdx2ProcResID[Index]; + const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID); + if (!PRDesc.SubUnitsIdxBegin) { + ResourcePressureDistribution[Index]++; + ResourceMask ^= Current; + continue; + } + + for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) { + unsigned OtherProcResID = PRDesc.SubUnitsIdxBegin[I]; + unsigned OtherMask = ProcResourceMasks[OtherProcResID]; + ResourcePressureDistribution[getResourceStateIndex(OtherMask)]++; + } + + ResourceMask ^= Current; + } + break; + } + + case HWPressureEvent::REGISTER_DEPS: + PressureIncreasedBecauseOfDataDependencies = true; + ++BPI.RegisterDependencyCycles; + break; + case HWPressureEvent::MEMORY_DEPS: + PressureIncreasedBecauseOfDataDependencies = true; + ++BPI.MemoryDependencyCycles; + break; + } +} + +void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const { + if (!SeenStallCycles || !BPI.PressureIncreaseCycles) { + OS << "\nNo resource or data dependency bottlenecks discovered.\n"; + return; + } + + double PressurePerCycle = + (double)BPI.PressureIncreaseCycles * 100 / TotalCycles; + double ResourcePressurePerCycle = + (double)BPI.ResourcePressureCycles * 100 / TotalCycles; + double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles; + double RegDepPressurePerCycle = + (double)BPI.RegisterDependencyCycles * 100 / TotalCycles; + double MemDepPressurePerCycle = + (double)BPI.MemoryDependencyCycles * 100 / TotalCycles; + + OS << "\nCycles with backend pressure increase [ " + << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]"; + + OS << "\nThroughput Bottlenecks: " + << "\n Resource Pressure [ " + << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100) + << "% ]"; + + if (BPI.PressureIncreaseCycles) { + for (unsigned I = 0, E = ResourcePressureDistribution.size(); I < E; ++I) { + if (ResourcePressureDistribution[I]) { + double Frequency = + (double)ResourcePressureDistribution[I] * 100 / TotalCycles; + unsigned Index = ResIdx2ProcResID[getResourceStateIndex(1ULL << I)]; + const MCProcResourceDesc &PRDesc = *SM.getProcResource(Index); + OS << "\n - " << PRDesc.Name << " [ " + << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]"; + } + } + } + + OS << "\n Data Dependencies: [ " + << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]"; + + OS << "\n - Register Dependencies [ " + << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100) + << "% ]"; + + OS << "\n - Memory Dependencies [ " + << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100) + << "% ]\n\n"; +} + +void BottleneckAnalysis::printView(raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + printBottleneckHints(TempStream); + TempStream.flush(); + OS << Buffer; +} +} // namespace mca. +} // namespace llvm diff --git a/tools/llvm-mca/Views/BottleneckAnalysis.h b/tools/llvm-mca/Views/BottleneckAnalysis.h new file mode 100644 index 00000000000..129ffb3d163 --- /dev/null +++ b/tools/llvm-mca/Views/BottleneckAnalysis.h @@ -0,0 +1,105 @@ +//===--------------------- BottleneckAnalysis.h -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the bottleneck analysis view. +/// +/// This view internally observes backend pressure increase events in order to +/// identify potential sources of bottlenecks. +/// +/// Example of bottleneck analysis report: +/// +/// Cycles with backend pressure increase [ 33.40% ] +/// Throughput Bottlenecks: +/// Resource Pressure [ 0.52% ] +/// - JLAGU [ 0.52% ] +/// Data Dependencies: [ 32.88% ] +/// - Register Dependencies [ 32.88% ] +/// - Memory Dependencies [ 0.00% ] +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_BOTTLENECK_ANALYSIS_H +#define LLVM_TOOLS_LLVM_MCA_BOTTLENECK_ANALYSIS_H + +#include "Views/View.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace mca { + +/// A view that collects and prints a few performance numbers. +class BottleneckAnalysis : public View { + const llvm::MCSchedModel &SM; + unsigned TotalCycles; + + struct BackPressureInfo { + // Cycles where backpressure increased. + unsigned PressureIncreaseCycles; + // Cycles where backpressure increased because of pipeline pressure. + unsigned ResourcePressureCycles; + // Cycles where backpressure increased because of data dependencies. + unsigned DataDependencyCycles; + // Cycles where backpressure increased because of register dependencies. + unsigned RegisterDependencyCycles; + // Cycles where backpressure increased because of memory dependencies. + unsigned MemoryDependencyCycles; + }; + BackPressureInfo BPI; + + // Resource pressure distribution. There is an element for every processor + // resource declared by the scheduling model. Quantities are number of cycles. + llvm::SmallVector ResourcePressureDistribution; + + // Each processor resource is associated with a so-called processor resource + // mask. This vector allows to correlate processor resource IDs with processor + // resource masks. There is exactly one element per each processor resource + // declared by the scheduling model. + llvm::SmallVector ProcResourceMasks; + + // Used to map resource indices to actual processor resource IDs. + llvm::SmallVector ResIdx2ProcResID; + + // True if resource pressure events were notified during this cycle. + bool PressureIncreasedBecauseOfResources; + bool PressureIncreasedBecauseOfDataDependencies; + + // True if throughput was affected by dispatch stalls. + bool SeenStallCycles; + + // Prints a bottleneck message to OS. + void printBottleneckHints(llvm::raw_ostream &OS) const; + +public: + BottleneckAnalysis(const llvm::MCSchedModel &Model); + + void onCycleEnd() override { + ++TotalCycles; + if (PressureIncreasedBecauseOfResources || + PressureIncreasedBecauseOfDataDependencies) { + ++BPI.PressureIncreaseCycles; + if (PressureIncreasedBecauseOfDataDependencies) + ++BPI.DataDependencyCycles; + PressureIncreasedBecauseOfResources = false; + PressureIncreasedBecauseOfDataDependencies = false; + } + } + + void onEvent(const HWStallEvent &Event) override { SeenStallCycles = true; } + + void onEvent(const HWPressureEvent &Event) override; + + void printView(llvm::raw_ostream &OS) const override; +}; + +} // namespace mca +} // namespace llvm + +#endif diff --git a/tools/llvm-mca/Views/SummaryView.cpp b/tools/llvm-mca/Views/SummaryView.cpp index 5aa53b2df90..ef5550048f4 100644 --- a/tools/llvm-mca/Views/SummaryView.cpp +++ b/tools/llvm-mca/Views/SummaryView.cpp @@ -23,18 +23,13 @@ namespace mca { #define DEBUG_TYPE "llvm-mca" SummaryView::SummaryView(const MCSchedModel &Model, ArrayRef S, - unsigned Width, bool EmitBottleneckAnalysis) + unsigned Width) : SM(Model), Source(S), DispatchWidth(Width?Width: Model.IssueWidth), LastInstructionIdx(0), - TotalCycles(0), NumMicroOps(0), BPI({0, 0, 0, 0, 0}), - ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0), + TotalCycles(0), NumMicroOps(0), ProcResourceUsage(Model.getNumProcResourceKinds(), 0), ProcResourceMasks(Model.getNumProcResourceKinds()), - ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0), - PressureIncreasedBecauseOfResources(false), - PressureIncreasedBecauseOfDataDependencies(false), - SeenStallCycles(false), - ShouldEmitBottleneckAnalysis(EmitBottleneckAnalysis) { + ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0) { computeProcResourceMasks(SM, ProcResourceMasks); for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { unsigned Index = getResourceStateIndex(ProcResourceMasks[I]); @@ -67,100 +62,6 @@ void SummaryView::onEvent(const HWInstructionEvent &Event) { } } -void SummaryView::onEvent(const HWPressureEvent &Event) { - assert(Event.Reason != HWPressureEvent::INVALID && - "Unexpected invalid event!"); - - switch (Event.Reason) { - default: - break; - - case HWPressureEvent::RESOURCES: { - PressureIncreasedBecauseOfResources = true; - ++BPI.ResourcePressureCycles; - uint64_t ResourceMask = Event.ResourceMask; - while (ResourceMask) { - uint64_t Current = ResourceMask & (-ResourceMask); - unsigned Index = getResourceStateIndex(Current); - unsigned ProcResID = ResIdx2ProcResID[Index]; - const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID); - if (!PRDesc.SubUnitsIdxBegin) { - ResourcePressureDistribution[Index]++; - ResourceMask ^= Current; - continue; - } - - for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) { - unsigned OtherProcResID = PRDesc.SubUnitsIdxBegin[I]; - unsigned OtherMask = ProcResourceMasks[OtherProcResID]; - ResourcePressureDistribution[getResourceStateIndex(OtherMask)]++; - } - - ResourceMask ^= Current; - } - } - - break; - case HWPressureEvent::REGISTER_DEPS: - PressureIncreasedBecauseOfDataDependencies = true; - ++BPI.RegisterDependencyCycles; - break; - case HWPressureEvent::MEMORY_DEPS: - PressureIncreasedBecauseOfDataDependencies = true; - ++BPI.MemoryDependencyCycles; - break; - } -} - -void SummaryView::printBottleneckHints(raw_ostream &OS) const { - if (!SeenStallCycles || !BPI.PressureIncreaseCycles) { - OS << "\nNo resource or data dependency bottlenecks discovered.\n"; - return; - } - - double PressurePerCycle = - (double)BPI.PressureIncreaseCycles * 100 / TotalCycles; - double ResourcePressurePerCycle = - (double)BPI.ResourcePressureCycles * 100 / TotalCycles; - double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles; - double RegDepPressurePerCycle = - (double)BPI.RegisterDependencyCycles * 100 / TotalCycles; - double MemDepPressurePerCycle = - (double)BPI.MemoryDependencyCycles * 100 / TotalCycles; - - OS << "\nCycles with backend pressure increase [ " - << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]"; - - OS << "\nThroughput Bottlenecks: " - << "\n Resource Pressure [ " - << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100) - << "% ]"; - - if (BPI.PressureIncreaseCycles) { - for (unsigned I = 0, E = ResourcePressureDistribution.size(); I < E; ++I) { - if (ResourcePressureDistribution[I]) { - double Frequency = - (double)ResourcePressureDistribution[I] * 100 / TotalCycles; - unsigned Index = ResIdx2ProcResID[getResourceStateIndex(1ULL << I)]; - const MCProcResourceDesc &PRDesc = *SM.getProcResource(Index); - OS << "\n - " << PRDesc.Name << " [ " - << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]"; - } - } - } - - OS << "\n Data Dependencies: [ " - << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]"; - - OS << "\n - Register Dependencies [ " - << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100) - << "% ]"; - - OS << "\n - Memory Dependencies [ " - << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100) - << "% ]\n\n"; -} - void SummaryView::printView(raw_ostream &OS) const { unsigned Instructions = Source.size(); unsigned Iterations = (LastInstructionIdx / Instructions) + 1; @@ -185,10 +86,9 @@ void SummaryView::printView(raw_ostream &OS) const { TempStream << "\nBlock RThroughput: " << format("%.1f", floor((BlockRThroughput * 10) + 0.5) / 10) << '\n'; - if (ShouldEmitBottleneckAnalysis) - printBottleneckHints(TempStream); TempStream.flush(); OS << Buffer; } + } // namespace mca. } // namespace llvm diff --git a/tools/llvm-mca/Views/SummaryView.h b/tools/llvm-mca/Views/SummaryView.h index ff3f6921500..9be31b7d51b 100644 --- a/tools/llvm-mca/Views/SummaryView.h +++ b/tools/llvm-mca/Views/SummaryView.h @@ -46,24 +46,6 @@ class SummaryView : public View { // The total number of micro opcodes contributed by a block of instructions. unsigned NumMicroOps; - struct BackPressureInfo { - // Cycles where backpressure increased. - unsigned PressureIncreaseCycles; - // Cycles where backpressure increased because of pipeline pressure. - unsigned ResourcePressureCycles; - // Cycles where backpressure increased because of data dependencies. - unsigned DataDependencyCycles; - // Cycles where backpressure increased because of register dependencies. - unsigned RegisterDependencyCycles; - // Cycles where backpressure increased because of memory dependencies. - unsigned MemoryDependencyCycles; - }; - BackPressureInfo BPI; - - // Resource pressure distribution. There is an element for every processor - // resource declared by the scheduling model. Quantities are number of cycles. - llvm::SmallVector ResourcePressureDistribution; - // For each processor resource, this vector stores the cumulative number of // resource cycles consumed by the analyzed code block. llvm::SmallVector ProcResourceUsage; @@ -77,49 +59,21 @@ class SummaryView : public View { // Used to map resource indices to actual processor resource IDs. llvm::SmallVector ResIdx2ProcResID; - // True if resource pressure events were notified during this cycle. - bool PressureIncreasedBecauseOfResources; - bool PressureIncreasedBecauseOfDataDependencies; - - // True if throughput was affected by dispatch stalls. - bool SeenStallCycles; - - // True if the bottleneck analysis should be displayed. - bool ShouldEmitBottleneckAnalysis; - // Compute the reciprocal throughput for the analyzed code block. // The reciprocal block throughput is computed as the MAX between: // - NumMicroOps / DispatchWidth // - Total Resource Cycles / #Units (for every resource consumed). double getBlockRThroughput() const; - // Prints a bottleneck message to OS. - void printBottleneckHints(llvm::raw_ostream &OS) const; - public: SummaryView(const llvm::MCSchedModel &Model, llvm::ArrayRef S, - unsigned Width, bool EmitBottleneckAnalysis); + unsigned Width); - void onCycleEnd() override { - ++TotalCycles; - if (PressureIncreasedBecauseOfResources || - PressureIncreasedBecauseOfDataDependencies) { - ++BPI.PressureIncreaseCycles; - if (PressureIncreasedBecauseOfDataDependencies) - ++BPI.DataDependencyCycles; - PressureIncreasedBecauseOfResources = false; - PressureIncreasedBecauseOfDataDependencies = false; - } - } + void onCycleEnd() override { ++TotalCycles; } void onEvent(const HWInstructionEvent &Event) override; - void onEvent(const HWStallEvent &Event) override { - SeenStallCycles = true; - } - - void onEvent(const HWPressureEvent &Event) override; - void printView(llvm::raw_ostream &OS) const override; }; + } // namespace mca } // namespace llvm diff --git a/tools/llvm-mca/llvm-mca.cpp b/tools/llvm-mca/llvm-mca.cpp index c39b49bebe1..861b6a5809a 100644 --- a/tools/llvm-mca/llvm-mca.cpp +++ b/tools/llvm-mca/llvm-mca.cpp @@ -23,6 +23,7 @@ #include "CodeRegion.h" #include "CodeRegionGenerator.h" #include "PipelinePrinter.h" +#include "Views/BottleneckAnalysis.h" #include "Views/DispatchStatistics.h" #include "Views/InstructionInfoView.h" #include "Views/RegisterFileStatistics.h" @@ -477,7 +478,10 @@ int main(int argc, char **argv) { if (PrintSummaryView) Printer.addView(llvm::make_unique( - SM, Insts, DispatchWidth, EnableBottleneckAnalysis)); + SM, Insts, DispatchWidth)); + + if (EnableBottleneckAnalysis) + Printer.addView(llvm::make_unique(SM)); if (PrintInstructionInfoView) Printer.addView(