From: Andrea Di Biagio Date: Thu, 10 Jan 2019 13:59:13 +0000 (+0000) Subject: [MCA] Fix wrong definition of ResourceUnitMask in DefaultResourceStrategy. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=437f3bdb2355b0f453a4b4824e4b74ed6b6a73a5;p=llvm [MCA] Fix wrong definition of ResourceUnitMask in DefaultResourceStrategy. Field ResourceUnitMask was incorrectly defined as a 'const unsigned' mask. It should have been a 64 bit quantity instead. That means, ResourceUnitMask was always implicitly truncated to a 32 bit quantity. This issue has been found by inspection. Surprisingly, that bug was latent, and it never negatively affected any existing upstream targets. This patch fixes the wrong definition of ResourceUnitMask, and adds a bunch of extra debug prints to help debugging potential issues related to invalid processor resource masks. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350820 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/MCA/HardwareUnits/ResourceManager.h b/include/llvm/MCA/HardwareUnits/ResourceManager.h index 9114f7d9bae..549a46c247f 100644 --- a/include/llvm/MCA/HardwareUnits/ResourceManager.h +++ b/include/llvm/MCA/HardwareUnits/ResourceManager.h @@ -72,7 +72,7 @@ class DefaultResourceStrategy final : public ResourceStrategy { /// /// There is one bit set for every available resource unit. /// It defaults to the value of field ResourceSizeMask in ResourceState. - const unsigned ResourceUnitMask; + const uint64_t ResourceUnitMask; /// A simple round-robin selector for processor resource units. /// Each bit of this mask identifies a sub resource within a group. @@ -335,13 +335,13 @@ class ResourceManager { // Used to quickly identify groups that own a particular resource unit. std::vector Resource2Groups; + // A table to map processor resource IDs to processor resource masks. + SmallVector ProcResID2Mask; + // Keeps track of which resources are busy, and how many cycles are left // before those become usable again. SmallDenseMap BusyResources; - // A table to map processor resource IDs to processor resource masks. - SmallVector ProcResID2Mask; - // Returns the actual resource unit that will be used. ResourceRef selectPipe(uint64_t ResourceID); diff --git a/include/llvm/MCA/Instruction.h b/include/llvm/MCA/Instruction.h index 38c7ff4e6b3..b91610c64d8 100644 --- a/include/llvm/MCA/Instruction.h +++ b/include/llvm/MCA/Instruction.h @@ -138,8 +138,8 @@ class WriteState { public: WriteState(const WriteDescriptor &Desc, unsigned RegID, bool clearsSuperRegs = false, bool writesZero = false) - : WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID), - PRFID(0), ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero), + : WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID), PRFID(0), + ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero), IsEliminated(false), DependentWrite(nullptr), PartialWrite(nullptr), DependentWriteCyclesLeft(0) {} @@ -155,7 +155,9 @@ public: void addUser(ReadState *Use, int ReadAdvance); void addUser(WriteState *Use); - unsigned getDependentWriteCyclesLeft() const { return DependentWriteCyclesLeft; } + unsigned getDependentWriteCyclesLeft() const { + return DependentWriteCyclesLeft; + } unsigned getNumUsers() const { unsigned NumUsers = Users.size(); @@ -348,11 +350,6 @@ struct InstrDesc { InstrDesc() = default; InstrDesc(const InstrDesc &Other) = delete; InstrDesc &operator=(const InstrDesc &Other) = delete; - -#ifndef NDEBUG - // Original instruction name for debugging purposes. - StringRef Name; -#endif }; /// Base class for instructions consumed by the simulation pipeline. @@ -551,4 +548,4 @@ public: } // namespace mca } // namespace llvm -#endif // LLVM_MCA_INSTRUCTION_H +#endif // LLVM_MCA_INSTRUCTION_H diff --git a/include/llvm/MCA/Stages/ExecuteStage.h b/include/llvm/MCA/Stages/ExecuteStage.h index 4f40c5c65d6..8cb287e06d9 100644 --- a/include/llvm/MCA/Stages/ExecuteStage.h +++ b/include/llvm/MCA/Stages/ExecuteStage.h @@ -65,7 +65,7 @@ public: void notifyInstructionIssued( const InstRef &IR, - ArrayRef> Used) const; + MutableArrayRef> Used) const; void notifyInstructionExecuted(const InstRef &IR) const; void notifyInstructionReady(const InstRef &IR) const; void notifyResourceAvailable(const ResourceRef &RR) const; diff --git a/include/llvm/MCA/Stages/InstructionTables.h b/include/llvm/MCA/Stages/InstructionTables.h index 50f84fc6fff..34e338f0ce6 100644 --- a/include/llvm/MCA/Stages/InstructionTables.h +++ b/include/llvm/MCA/Stages/InstructionTables.h @@ -32,7 +32,8 @@ class InstructionTables final : public Stage { SmallVector Masks; public: - InstructionTables(const MCSchedModel &Model) : Stage(), SM(Model) { + InstructionTables(const MCSchedModel &Model) + : Stage(), SM(Model), Masks(Model.getNumProcResourceKinds()) { computeProcResourceMasks(Model, Masks); } diff --git a/include/llvm/MCA/Support.h b/include/llvm/MCA/Support.h index 5c5a3545433..7b0c5bf3a48 100644 --- a/include/llvm/MCA/Support.h +++ b/include/llvm/MCA/Support.h @@ -104,7 +104,7 @@ public: /// Resource masks are used by the ResourceManager to solve set membership /// problems with simple bit manipulation operations. void computeProcResourceMasks(const MCSchedModel &SM, - SmallVectorImpl &Masks); + MutableArrayRef Masks); /// Compute the reciprocal block throughput from a set of processor resource /// cycles. The reciprocal block throughput is computed as the MAX between: diff --git a/lib/MCA/HardwareUnits/ResourceManager.cpp b/lib/MCA/HardwareUnits/ResourceManager.cpp index d05ec0045de..2039b58e8ee 100644 --- a/lib/MCA/HardwareUnits/ResourceManager.cpp +++ b/lib/MCA/HardwareUnits/ResourceManager.cpp @@ -118,7 +118,8 @@ getStrategyFor(const ResourceState &RS) { ResourceManager::ResourceManager(const MCSchedModel &SM) : Resources(SM.getNumProcResourceKinds()), Strategies(SM.getNumProcResourceKinds()), - Resource2Groups(SM.getNumProcResourceKinds(), 0) { + Resource2Groups(SM.getNumProcResourceKinds(), 0), + ProcResID2Mask(SM.getNumProcResourceKinds()) { computeProcResourceMasks(SM, ProcResID2Mask); for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { @@ -283,9 +284,6 @@ void ResourceManager::issueInstruction( ResourceRef Pipe = selectPipe(R.first); use(Pipe); BusyResources[Pipe] += CS.size(); - // Replace the resource mask with a valid processor resource index. - const ResourceState &RS = *Resources[getResourceStateIndex(Pipe.first)]; - Pipe.first = RS.getProcResourceID(); Pipes.emplace_back(std::pair( Pipe, ResourceCycles(CS.size()))); } else { diff --git a/lib/MCA/InstrBuilder.cpp b/lib/MCA/InstrBuilder.cpp index 2cfe154a902..d2d65e55537 100644 --- a/lib/MCA/InstrBuilder.cpp +++ b/lib/MCA/InstrBuilder.cpp @@ -31,6 +31,8 @@ InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti, const llvm::MCInstrAnalysis *mcia) : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true), FirstReturnInst(true) { + const MCSchedModel &SM = STI.getSchedModel(); + ProcResourceMasks.resize(SM.getNumProcResourceKinds()); computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks); } @@ -178,8 +180,8 @@ static void initializeUsedResources(InstrDesc &ID, LLVM_DEBUG({ for (const std::pair &R : ID.Resources) - dbgs() << "\t\tMask=" << format_hex(R.first, 16) << ", " << - "cy=" << R.second.size() << '\n'; + dbgs() << "\t\tMask=" << format_hex(R.first, 16) << ", " + << "cy=" << R.second.size() << '\n'; for (const uint64_t R : ID.Buffers) dbgs() << "\t\tBuffer Mask=" << format_hex(R, 16) << '\n'; }); @@ -525,6 +527,9 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI) { MCI); } + LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n'); + LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n'); + // Create a new empty descriptor. std::unique_ptr ID = llvm::make_unique(); ID->NumMicroOps = SCDesc.NumMicroOps; @@ -559,9 +564,6 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI) { populateWrites(*ID, MCI, SchedClassID); populateReads(*ID, MCI, SchedClassID); -#ifndef NDEBUG - ID->Name = MCII.getName(Opcode); -#endif LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n'); LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n'); diff --git a/lib/MCA/Pipeline.cpp b/lib/MCA/Pipeline.cpp index fd97ea624b8..4c0e37c9ba7 100644 --- a/lib/MCA/Pipeline.cpp +++ b/lib/MCA/Pipeline.cpp @@ -83,13 +83,13 @@ void Pipeline::appendStage(std::unique_ptr S) { } void Pipeline::notifyCycleBegin() { - LLVM_DEBUG(dbgs() << "[E] Cycle begin: " << Cycles << '\n'); + LLVM_DEBUG(dbgs() << "\n[E] Cycle begin: " << Cycles << '\n'); for (HWEventListener *Listener : Listeners) Listener->onCycleBegin(); } void Pipeline::notifyCycleEnd() { - LLVM_DEBUG(dbgs() << "[E] Cycle end: " << Cycles << "\n\n"); + LLVM_DEBUG(dbgs() << "[E] Cycle end: " << Cycles << "\n"); for (HWEventListener *Listener : Listeners) Listener->onCycleEnd(); } diff --git a/lib/MCA/Stages/ExecuteStage.cpp b/lib/MCA/Stages/ExecuteStage.cpp index 17f7ff7259a..e78327763fa 100644 --- a/lib/MCA/Stages/ExecuteStage.cpp +++ b/lib/MCA/Stages/ExecuteStage.cpp @@ -57,6 +57,7 @@ Error ExecuteStage::issueInstruction(InstRef &IR) { HWS.issueInstruction(IR, Used, Ready); notifyReservedOrReleasedBuffers(IR, /* Reserved */ false); + notifyInstructionIssued(IR, Used); if (IR.getInstruction()->isExecuted()) { notifyInstructionExecuted(IR); @@ -184,7 +185,7 @@ void ExecuteStage::notifyResourceAvailable(const ResourceRef &RR) const { void ExecuteStage::notifyInstructionIssued( const InstRef &IR, - ArrayRef> Used) const { + MutableArrayRef> Used) const { LLVM_DEBUG({ dbgs() << "[E] Instruction Issued: #" << IR << '\n'; for (const std::pair &Resource : Used) { @@ -193,6 +194,11 @@ void ExecuteStage::notifyInstructionIssued( dbgs() << "cycles: " << Resource.second << '\n'; } }); + + // Replace resource masks with valid resource processor IDs. + for (std::pair &Use : Used) + Use.first.first = HWS.getResourceID(Use.first.first); + notifyEvent(HWInstructionIssuedEvent(IR, Used)); } diff --git a/lib/MCA/Support.cpp b/lib/MCA/Support.cpp index 3271bc6bf5b..335953e1048 100644 --- a/lib/MCA/Support.cpp +++ b/lib/MCA/Support.cpp @@ -19,13 +19,18 @@ namespace llvm { namespace mca { +#define DEBUG_TYPE "llvm-mca" + void computeProcResourceMasks(const MCSchedModel &SM, - SmallVectorImpl &Masks) { + MutableArrayRef Masks) { unsigned ProcResourceID = 0; + assert(Masks.size() == SM.getNumProcResourceKinds() && + "Invalid number of elements"); + // Resource at index 0 is the 'InvalidUnit'. Set an invalid mask for it. + Masks[0] = 0; + // Create a unique bitmask for every processor resource unit. - // Skip resource at index 0, since it always references 'InvalidUnit'. - Masks.resize(SM.getNumProcResourceKinds()); for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { const MCProcResourceDesc &Desc = *SM.getProcResource(I); if (Desc.SubUnitsIdxBegin) @@ -46,6 +51,16 @@ void computeProcResourceMasks(const MCSchedModel &SM, } ProcResourceID++; } + +#ifndef NDEBUG + LLVM_DEBUG(dbgs() << "\nProcessor resource masks:" + << "\n"); + for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { + const MCProcResourceDesc &Desc = *SM.getProcResource(I); + LLVM_DEBUG(dbgs() << '[' << I << "] " << Desc.Name << " - " << Masks[I] + << '\n'); + } +#endif } double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth, diff --git a/tools/llvm-mca/Views/SummaryView.cpp b/tools/llvm-mca/Views/SummaryView.cpp index 3a7eb827c47..d8ac709e784 100644 --- a/tools/llvm-mca/Views/SummaryView.cpp +++ b/tools/llvm-mca/Views/SummaryView.cpp @@ -27,7 +27,8 @@ SummaryView::SummaryView(const MCSchedModel &Model, ArrayRef S, unsigned Width) : SM(Model), Source(S), DispatchWidth(Width), LastInstructionIdx(0), TotalCycles(0), NumMicroOps(0), - ProcResourceUsage(Model.getNumProcResourceKinds(), 0) { + ProcResourceUsage(Model.getNumProcResourceKinds(), 0), + ProcResourceMasks(Model.getNumProcResourceKinds()) { computeProcResourceMasks(SM, ProcResourceMasks); }