From: Andrea Di Biagio Date: Mon, 8 Apr 2019 16:05:54 +0000 (+0000) Subject: [llvm-mca][scheduler-stats] Print issued micro opcodes per cycle. NFCI X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=eb44479056803c5b4c5242a0876e24a3ef676af4;p=llvm [llvm-mca][scheduler-stats] Print issued micro opcodes per cycle. NFCI It makes more sense to print out the number of micro opcodes that are issued every cycle rather than the number of instructions issued per cycle. This behavior is also consistent with the dispatch-stats: numbers from the two views can now be easily compared. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@357919 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/docs/CommandGuide/llvm-mca.rst b/docs/CommandGuide/llvm-mca.rst index a0eeb08bc9b..5d504a9f9cb 100644 --- a/docs/CommandGuide/llvm-mca.rst +++ b/docs/CommandGuide/llvm-mca.rst @@ -498,7 +498,7 @@ sections. 2, 314 (51.5%) - Schedulers - number of cycles where we saw N instructions issued: + Schedulers - number of cycles where we saw N micro opcodes issued: [# issued], [# cycles] 0, 7 (1.1%) 1, 306 (50.2%) @@ -552,9 +552,9 @@ dispatch statistics are displayed by either using the command option ``-all-stats`` or ``-dispatch-stats``. The next table, *Schedulers*, presents a histogram displaying a count, -representing the number of instructions issued on some number of cycles. In -this case, of the 610 simulated cycles, single instructions were issued 306 -times (50.2%) and there were 7 cycles where no instructions were issued. +representing the number of micro opcodes issued on some number of cycles. In +this case, of the 610 simulated cycles, single opcodes were issued 306 times +(50.2%) and there were 7 cycles where no opcodes were issued. The *Scheduler's queue usage* table shows that the average and maximum number of buffer entries (i.e., scheduler queue entries) used at runtime. Resource JFPU01 diff --git a/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s b/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s index 8a2c7804ef3..7249998a780 100644 --- a/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s +++ b/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s @@ -25,7 +25,7 @@ # M4-NEXT: IPC: 0.50 # M4-NEXT: Block RThroughput: 0.2 -# ALL: Schedulers - number of cycles where we saw N instructions issued: +# ALL: Schedulers - number of cycles where we saw N micro opcodes issued: # ALL-NEXT: [# issued], [# cycles] # ALL-NEXT: 0, 1 (50.0%) # ALL-NEXT: 1, 1 (50.0%) diff --git a/test/tools/llvm-mca/X86/BdVer2/load-throughput.s b/test/tools/llvm-mca/X86/BdVer2/load-throughput.s index 9c45ce63fa4..5ab47b28a18 100644 --- a/test/tools/llvm-mca/X86/BdVer2/load-throughput.s +++ b/test/tools/llvm-mca/X86/BdVer2/load-throughput.s @@ -90,7 +90,7 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: 2, 172 (83.1%) # CHECK-NEXT: 4, 14 (6.8%) -# CHECK: Schedulers - number of cycles where we saw N instructions issued: +# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] # CHECK-NEXT: 0, 7 (3.4%) # CHECK-NEXT: 2, 200 (96.6%) @@ -203,7 +203,7 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: 2, 172 (83.1%) # CHECK-NEXT: 4, 14 (6.8%) -# CHECK: Schedulers - number of cycles where we saw N instructions issued: +# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] # CHECK-NEXT: 0, 7 (3.4%) # CHECK-NEXT: 2, 200 (96.6%) @@ -316,7 +316,7 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: 2, 172 (83.1%) # CHECK-NEXT: 4, 14 (6.8%) -# CHECK: Schedulers - number of cycles where we saw N instructions issued: +# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] # CHECK-NEXT: 0, 7 (3.4%) # CHECK-NEXT: 2, 200 (96.6%) @@ -429,7 +429,7 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: 2, 172 (83.1%) # CHECK-NEXT: 4, 14 (6.8%) -# CHECK: Schedulers - number of cycles where we saw N instructions issued: +# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] # CHECK-NEXT: 0, 7 (3.4%) # CHECK-NEXT: 2, 200 (96.6%) @@ -542,7 +542,7 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: 2, 172 (83.1%) # CHECK-NEXT: 4, 14 (6.8%) -# CHECK: Schedulers - number of cycles where we saw N instructions issued: +# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] # CHECK-NEXT: 0, 7 (3.4%) # CHECK-NEXT: 2, 200 (96.6%) @@ -655,7 +655,7 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: 2, 172 (83.1%) # CHECK-NEXT: 4, 14 (6.8%) -# CHECK: Schedulers - number of cycles where we saw N instructions issued: +# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] # CHECK-NEXT: 0, 7 (3.4%) # CHECK-NEXT: 2, 200 (96.6%) @@ -767,10 +767,10 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: 0, 7 (3.4%) # CHECK-NEXT: 4, 200 (96.6%) -# CHECK: Schedulers - number of cycles where we saw N instructions issued: +# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] # CHECK-NEXT: 0, 7 (3.4%) -# CHECK-NEXT: 2, 200 (96.6%) +# CHECK-NEXT: 4, 200 (96.6%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. diff --git a/test/tools/llvm-mca/X86/BdVer2/scheduler-queue-usage.s b/test/tools/llvm-mca/X86/BdVer2/scheduler-queue-usage.s index db7a373a23e..dc3804efca9 100644 --- a/test/tools/llvm-mca/X86/BdVer2/scheduler-queue-usage.s +++ b/test/tools/llvm-mca/X86/BdVer2/scheduler-queue-usage.s @@ -26,7 +26,7 @@ add %rsi, %rsi # CHECK-NEXT: 1 10 1.00 * vmulps (%rsi), %xmm0, %xmm0 # CHECK-NEXT: 1 1 0.50 addq %rsi, %rsi -# CHECK: Schedulers - number of cycles where we saw N instructions issued: +# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] # CHECK-NEXT: 0, 12 (92.3%) # CHECK-NEXT: 2, 1 (7.7%) diff --git a/test/tools/llvm-mca/X86/BdVer2/store-throughput.s b/test/tools/llvm-mca/X86/BdVer2/store-throughput.s index 67f13c3ccdd..987952aa3eb 100644 --- a/test/tools/llvm-mca/X86/BdVer2/store-throughput.s +++ b/test/tools/llvm-mca/X86/BdVer2/store-throughput.s @@ -91,7 +91,7 @@ vmovaps %ymm3, (%rbx) # CHECK-NEXT: 2, 1 (0.2%) # CHECK-NEXT: 4, 7 (1.7%) -# CHECK: Schedulers - number of cycles where we saw N instructions issued: +# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] # CHECK-NEXT: 0, 3 (0.7%) # CHECK-NEXT: 1, 400 (99.3%) @@ -205,7 +205,7 @@ vmovaps %ymm3, (%rbx) # CHECK-NEXT: 2, 1 (0.2%) # CHECK-NEXT: 4, 7 (1.7%) -# CHECK: Schedulers - number of cycles where we saw N instructions issued: +# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] # CHECK-NEXT: 0, 3 (0.7%) # CHECK-NEXT: 1, 400 (99.3%) @@ -319,7 +319,7 @@ vmovaps %ymm3, (%rbx) # CHECK-NEXT: 2, 1 (0.2%) # CHECK-NEXT: 4, 7 (1.7%) -# CHECK: Schedulers - number of cycles where we saw N instructions issued: +# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] # CHECK-NEXT: 0, 3 (0.7%) # CHECK-NEXT: 1, 400 (99.3%) @@ -433,7 +433,7 @@ vmovaps %ymm3, (%rbx) # CHECK-NEXT: 2, 1 (0.2%) # CHECK-NEXT: 4, 7 (1.7%) -# CHECK: Schedulers - number of cycles where we saw N instructions issued: +# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] # CHECK-NEXT: 0, 3 (0.7%) # CHECK-NEXT: 1, 400 (99.3%) @@ -547,7 +547,7 @@ vmovaps %ymm3, (%rbx) # CHECK-NEXT: 2, 1 (0.1%) # CHECK-NEXT: 4, 6 (0.7%) -# CHECK: Schedulers - number of cycles where we saw N instructions issued: +# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] # CHECK-NEXT: 0, 403 (50.2%) # CHECK-NEXT: 1, 400 (49.8%) @@ -662,7 +662,7 @@ vmovaps %ymm3, (%rbx) # CHECK-NEXT: 2, 1 (0.2%) # CHECK-NEXT: 4, 7 (1.7%) -# CHECK: Schedulers - number of cycles where we saw N instructions issued: +# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] # CHECK-NEXT: 0, 3 (0.7%) # CHECK-NEXT: 1, 400 (99.3%) @@ -774,10 +774,10 @@ vmovaps %ymm3, (%rbx) # CHECK-NEXT: 0, 3 (0.7%) # CHECK-NEXT: 4, 400 (99.3%) -# CHECK: Schedulers - number of cycles where we saw N instructions issued: +# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] # CHECK-NEXT: 0, 3 (0.7%) -# CHECK-NEXT: 1, 400 (99.3%) +# CHECK-NEXT: 4, 400 (99.3%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. diff --git a/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s b/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s index 58c4b4476f5..6670144777f 100644 --- a/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s +++ b/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s @@ -26,7 +26,7 @@ add %rsi, %rsi # CHECK-NEXT: 1 7 1.00 * vmulps (%rsi), %xmm0, %xmm0 # CHECK-NEXT: 1 1 0.50 addq %rsi, %rsi -# CHECK: Schedulers - number of cycles where we saw N instructions issued: +# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] # CHECK-NEXT: 0, 9 (90.0%) # CHECK-NEXT: 2, 1 (10.0%) diff --git a/test/tools/llvm-mca/X86/option-all-stats-1.s b/test/tools/llvm-mca/X86/option-all-stats-1.s index d2265583c1d..74c09f6c360 100644 --- a/test/tools/llvm-mca/X86/option-all-stats-1.s +++ b/test/tools/llvm-mca/X86/option-all-stats-1.s @@ -41,7 +41,7 @@ add %eax, %eax # FULLREPORT-NEXT: 1, 62 (60.2%) # FULLREPORT-NEXT: 2, 19 (18.4%) -# FULLREPORT: Schedulers - number of cycles where we saw N instructions issued: +# FULLREPORT: Schedulers - number of cycles where we saw N micro opcodes issued: # FULLREPORT-NEXT: [# issued], [# cycles] # FULLREPORT-NEXT: 0, 3 (2.9%) # FULLREPORT-NEXT: 1, 100 (97.1%) diff --git a/test/tools/llvm-mca/X86/option-all-stats-2.s b/test/tools/llvm-mca/X86/option-all-stats-2.s index e752d82bd0f..c0a953621d2 100644 --- a/test/tools/llvm-mca/X86/option-all-stats-2.s +++ b/test/tools/llvm-mca/X86/option-all-stats-2.s @@ -42,7 +42,7 @@ add %eax, %eax # FULL-NEXT: 1, 62 (60.2%) # FULL-NEXT: 2, 19 (18.4%) -# ALL: Schedulers - number of cycles where we saw N instructions issued: +# ALL: Schedulers - number of cycles where we saw N micro opcodes issued: # ALL-NEXT: [# issued], [# cycles] # ALL-NEXT: 0, 3 (2.9%) # ALL-NEXT: 1, 100 (97.1%) diff --git a/test/tools/llvm-mca/X86/option-all-views-1.s b/test/tools/llvm-mca/X86/option-all-views-1.s index 298a54b9ed4..79f9f123902 100644 --- a/test/tools/llvm-mca/X86/option-all-views-1.s +++ b/test/tools/llvm-mca/X86/option-all-views-1.s @@ -43,7 +43,7 @@ add %eax, %eax # FULLREPORT-NEXT: 1, 62 (60.2%) # FULLREPORT-NEXT: 2, 19 (18.4%) -# FULLREPORT: Schedulers - number of cycles where we saw N instructions issued: +# FULLREPORT: Schedulers - number of cycles where we saw N micro opcodes issued: # FULLREPORT-NEXT: [# issued], [# cycles] # FULLREPORT-NEXT: 0, 3 (2.9%) # FULLREPORT-NEXT: 1, 100 (97.1%) diff --git a/test/tools/llvm-mca/X86/option-all-views-2.s b/test/tools/llvm-mca/X86/option-all-views-2.s index 0afd21fc263..00bc1765aa6 100644 --- a/test/tools/llvm-mca/X86/option-all-views-2.s +++ b/test/tools/llvm-mca/X86/option-all-views-2.s @@ -42,7 +42,7 @@ add %eax, %eax # ALL-NEXT: 1, 62 (60.2%) # ALL-NEXT: 2, 19 (18.4%) -# ALL: Schedulers - number of cycles where we saw N instructions issued: +# ALL: Schedulers - number of cycles where we saw N micro opcodes issued: # ALL-NEXT: [# issued], [# cycles] # ALL-NEXT: 0, 3 (2.9%) # ALL-NEXT: 1, 100 (97.1%) diff --git a/test/tools/llvm-mca/X86/scheduler-queue-usage.s b/test/tools/llvm-mca/X86/scheduler-queue-usage.s index a1854a28219..733745a093d 100644 --- a/test/tools/llvm-mca/X86/scheduler-queue-usage.s +++ b/test/tools/llvm-mca/X86/scheduler-queue-usage.s @@ -13,7 +13,7 @@ xor %eax, %ebx -# ALL: Schedulers - number of cycles where we saw N instructions issued: +# ALL: Schedulers - number of cycles where we saw N micro opcodes issued: # ALL-NEXT: [# issued], [# cycles] # ALL-NEXT: 0, 3 (75.0%) # ALL-NEXT: 1, 1 (25.0%) diff --git a/tools/llvm-mca/Views/SchedulerStatistics.cpp b/tools/llvm-mca/Views/SchedulerStatistics.cpp index a6143494d48..bd0ba350ab6 100644 --- a/tools/llvm-mca/Views/SchedulerStatistics.cpp +++ b/tools/llvm-mca/Views/SchedulerStatistics.cpp @@ -22,7 +22,6 @@ SchedulerStatistics::SchedulerStatistics(const llvm::MCSubtargetInfo &STI) : SM(STI.getSchedModel()), LQResourceID(0), SQResourceID(0), NumIssued(0), NumCycles(0), MostRecentLoadDispatched(~0U), MostRecentStoreDispatched(~0U), - IssuedPerCycle(STI.getSchedModel().NumProcResourceKinds, 0), Usage(STI.getSchedModel().NumProcResourceKinds, {0, 0, 0}) { if (SM.hasExtraProcessorInfo()) { const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); @@ -43,9 +42,10 @@ SchedulerStatistics::SchedulerStatistics(const llvm::MCSubtargetInfo &STI) // In future we should add a new "memory queue" event type, so that we stop // making assumptions on how LSUnit internally works (See PR39828). void SchedulerStatistics::onEvent(const HWInstructionEvent &Event) { - if (Event.Type == HWInstructionEvent::Issued) - ++NumIssued; - else if (Event.Type == HWInstructionEvent::Dispatched) { + if (Event.Type == HWInstructionEvent::Issued) { + const Instruction &Inst = *Event.IR.getInstruction(); + NumIssued += Inst.getDesc().NumMicroOps; + } else if (Event.Type == HWInstructionEvent::Dispatched) { const Instruction &Inst = *Event.IR.getInstruction(); const unsigned Index = Event.IR.getSourceIndex(); if (LQResourceID && Inst.getDesc().MayLoad && @@ -95,29 +95,25 @@ void SchedulerStatistics::updateHistograms() { BU.MaxUsedSlots = std::max(BU.MaxUsedSlots, BU.SlotsInUse); } - IssuedPerCycle[NumIssued]++; + IssueWidthPerCycle[NumIssued]++; NumIssued = 0; } void SchedulerStatistics::printSchedulerStats(raw_ostream &OS) const { OS << "\n\nSchedulers - " - << "number of cycles where we saw N instructions issued:\n"; + << "number of cycles where we saw N micro opcodes issued:\n"; OS << "[# issued], [# cycles]\n"; - const auto It = - std::max_element(IssuedPerCycle.begin(), IssuedPerCycle.end()); - unsigned Index = std::distance(IssuedPerCycle.begin(), It); - bool HasColors = OS.has_colors(); - for (unsigned I = 0, E = IssuedPerCycle.size(); I < E; ++I) { - unsigned IPC = IssuedPerCycle[I]; - if (!IPC) - continue; - - if (I == Index && HasColors) + const auto It = + std::max_element(IssueWidthPerCycle.begin(), IssueWidthPerCycle.end()); + for (const std::pair &Entry : IssueWidthPerCycle) { + unsigned NumIssued = Entry.first; + if (NumIssued == It->first && HasColors) OS.changeColor(raw_ostream::SAVEDCOLOR, true, false); - OS << " " << I << ", " << IPC << " (" + unsigned IPC = Entry.second; + OS << " " << NumIssued << ", " << IPC << " (" << format("%.1f", ((double)IPC / NumCycles) * 100) << "%)\n"; if (HasColors) OS.resetColor(); diff --git a/tools/llvm-mca/Views/SchedulerStatistics.h b/tools/llvm-mca/Views/SchedulerStatistics.h index 8e4127c131d..32711b4483b 100644 --- a/tools/llvm-mca/Views/SchedulerStatistics.h +++ b/tools/llvm-mca/Views/SchedulerStatistics.h @@ -62,7 +62,9 @@ class SchedulerStatistics final : public View { uint64_t CumulativeNumUsedSlots; }; - std::vector IssuedPerCycle; + using Histogram = std::map; + Histogram IssueWidthPerCycle; + std::vector Usage; void updateHistograms();