return NextGroupID++;
}
- // Instruction executed event handlers.
virtual void onInstructionExecuted(const InstRef &IR);
+ // Loads are tracked by the LDQ (load queue) from dispatch until completion.
+ // Stores are tracked by the STQ (store queue) from dispatch until commitment.
+ // By default we conservatively assume that the LDQ receives a load at
+ // dispatch. Loads leave the LDQ at retirement stage.
+ virtual void onInstructionRetired(const InstRef &IR);
+
virtual void onInstructionIssued(const InstRef &IR) {
unsigned GroupID = IR.getInstruction()->getLSUTokenID();
Groups[GroupID]->onInstructionIssued(IR);
/// 6. A store has to wait until an older store barrier is fully executed.
unsigned dispatch(const InstRef &IR) override;
- // FIXME: For simplicity, we optimistically assume a similar behavior for
- // store instructions. In practice, store operations don't tend to leave the
- // store queue until they reach the 'Retired' stage (See PR39830).
void onInstructionExecuted(const InstRef &IR) override;
};
#ifndef LLVM_MCA_RETIRE_STAGE_H
#define LLVM_MCA_RETIRE_STAGE_H
+#include "llvm/MCA/HardwareUnits/LSUnit.h"
#include "llvm/MCA/HardwareUnits/RegisterFile.h"
#include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
#include "llvm/MCA/Stages/Stage.h"
// Owner will go away when we move listeners/eventing to the stages.
RetireControlUnit &RCU;
RegisterFile &PRF;
+ LSUnitBase &LSU;
RetireStage(const RetireStage &Other) = delete;
RetireStage &operator=(const RetireStage &Other) = delete;
public:
- RetireStage(RetireControlUnit &R, RegisterFile &F)
- : Stage(), RCU(R), PRF(F) {}
+ RetireStage(RetireControlUnit &R, RegisterFile &F, LSUnitBase &LS)
+ : Stage(), RCU(R), PRF(F), LSU(LS) {}
bool hasWorkToComplete() const override { return !RCU.isEmpty(); }
Error cycleStart() override;
*RCU, *PRF);
auto Execute =
std::make_unique<ExecuteStage>(*HWS, Opts.EnableBottleneckAnalysis);
- auto Retire = std::make_unique<RetireStage>(*RCU, *PRF);
+ auto Retire = std::make_unique<RetireStage>(*RCU, *PRF, *LSU);
// Pass the ownership of all the hardware units to this Context.
addHardwareUnit(std::move(RCU));
}
void LSUnitBase::onInstructionExecuted(const InstRef &IR) {
- const InstrDesc &Desc = IR.getInstruction()->getDesc();
- bool IsALoad = Desc.MayLoad;
- bool IsAStore = Desc.MayStore;
- assert((IsALoad || IsAStore) && "Expected a memory operation!");
-
unsigned GroupID = IR.getInstruction()->getLSUTokenID();
auto It = Groups.find(GroupID);
+ assert(It != Groups.end() && "Instruction not dispatched to the LS unit");
It->second->onInstructionExecuted();
- if (It->second->isExecuted()) {
+ if (It->second->isExecuted())
Groups.erase(It);
- }
+}
+
+void LSUnitBase::onInstructionRetired(const InstRef &IR) {
+ const InstrDesc &Desc = IR.getInstruction()->getDesc();
+ bool IsALoad = Desc.MayLoad;
+ bool IsAStore = Desc.MayStore;
+ assert((IsALoad || IsAStore) && "Expected a memory operation!");
if (IsALoad) {
releaseLQSlot();
llvm::SmallVector<unsigned, 4> FreedRegs(PRF.getNumRegisterFiles());
const Instruction &Inst = *IR.getInstruction();
+ // Release the load/store queue entries.
+ if (Inst.isMemOp())
+ LSU.onInstructionRetired(IR);
+
for (const WriteState &WS : Inst.getDefs())
PRF.removeRegisterWrite(WS, FreedRegs);
notifyEvent<HWInstructionEvent>(HWInstructionRetiredEvent(IR, FreedRegs));
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
-# CHECK-NEXT: Total Cycles: 593
+# CHECK-NEXT: Total Cycles: 554
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 4
-# CHECK-NEXT: uOps Per Cycle: 0.67
-# CHECK-NEXT: IPC: 0.67
+# CHECK-NEXT: uOps Per Cycle: 0.72
+# CHECK-NEXT: IPC: 0.72
# CHECK-NEXT: Block RThroughput: 4.0
# CHECK: Instruction Info:
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 0
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
-# CHECK-NEXT: SCHEDQ - Scheduler full: 187 (31.5%)
+# CHECK-NEXT: SCHEDQ - Scheduler full: 55 (9.9%)
# CHECK-NEXT: LQ - Load queue full: 0
-# CHECK-NEXT: SQ - Store queue full: 342 (57.7%)
+# CHECK-NEXT: SQ - Store queue full: 437 (78.9%)
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
-# CHECK-NEXT: 0, 403 (68.0%)
-# CHECK-NEXT: 1, 90 (15.2%)
-# CHECK-NEXT: 2, 2 (0.3%)
-# CHECK-NEXT: 3, 86 (14.5%)
-# CHECK-NEXT: 4, 12 (2.0%)
+# CHECK-NEXT: 0, 365 (65.9%)
+# CHECK-NEXT: 1, 88 (15.9%)
+# CHECK-NEXT: 2, 3 (0.5%)
+# CHECK-NEXT: 3, 86 (15.5%)
+# CHECK-NEXT: 4, 12 (2.2%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [# issued], [# cycles]
-# CHECK-NEXT: 0, 292 (49.2%)
-# CHECK-NEXT: 1, 202 (34.1%)
-# CHECK-NEXT: 2, 99 (16.7%)
+# CHECK-NEXT: 0, 253 (45.7%)
+# CHECK-NEXT: 1, 202 (36.5%)
+# CHECK-NEXT: 2, 99 (17.9%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: [1] Resource name.
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions:
# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - - - 3.00 - - - - 1.00 movd %mm0, (%rax)
-# CHECK-NEXT: 0.36 2.64 - - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - movd (%rcx), %mm1
-# CHECK-NEXT: 2.64 0.36 - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - - movd (%rdx), %mm2
+# CHECK-NEXT: 1.53 1.47 - - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - movd (%rcx), %mm1
+# CHECK-NEXT: 1.47 1.53 - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - - movd (%rdx), %mm2
# CHECK-NEXT: 1.00 - - - - - - - - - - - - 1.00 - - 3.00 - - - - - 1.00 movd %mm3, (%rbx)
# CHECK: Timeline view:
# CHECK-NEXT: RAT - Register unavailable: 0
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
-# CHECK-NEXT: LQ - Load queue full: 353 (86.9%)
+# CHECK-NEXT: LQ - Load queue full: 354 (87.2%)
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK-NEXT: [4] Total number of buffer entries.
# CHECK: [1] [2] [3] [4]
-# CHECK-NEXT: PdEX 32 36 40
+# CHECK-NEXT: PdEX 31 34 40
# CHECK-NEXT: PdFPU 0 0 64
-# CHECK-NEXT: PdLoad 37 40 40
+# CHECK-NEXT: PdLoad 36 40 40
# CHECK-NEXT: PdStore 0 0 24
# CHECK: Resources:
# CHECK-NEXT: RAT - Register unavailable: 0
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
-# CHECK-NEXT: LQ - Load queue full: 353 (86.9%)
+# CHECK-NEXT: LQ - Load queue full: 354 (87.2%)
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK-NEXT: [4] Total number of buffer entries.
# CHECK: [1] [2] [3] [4]
-# CHECK-NEXT: PdEX 32 36 40
+# CHECK-NEXT: PdEX 31 34 40
# CHECK-NEXT: PdFPU 0 0 64
-# CHECK-NEXT: PdLoad 37 40 40
+# CHECK-NEXT: PdLoad 36 40 40
# CHECK-NEXT: PdStore 0 0 24
# CHECK: Resources:
# CHECK-NEXT: RAT - Register unavailable: 0
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
-# CHECK-NEXT: LQ - Load queue full: 353 (86.9%)
+# CHECK-NEXT: LQ - Load queue full: 354 (87.2%)
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK-NEXT: [4] Total number of buffer entries.
# CHECK: [1] [2] [3] [4]
-# CHECK-NEXT: PdEX 32 36 40
+# CHECK-NEXT: PdEX 31 34 40
# CHECK-NEXT: PdFPU 0 0 64
-# CHECK-NEXT: PdLoad 37 40 40
+# CHECK-NEXT: PdLoad 36 40 40
# CHECK-NEXT: PdStore 0 0 24
# CHECK: Resources:
# CHECK-NEXT: RAT - Register unavailable: 0
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
-# CHECK-NEXT: LQ - Load queue full: 353 (86.9%)
+# CHECK-NEXT: LQ - Load queue full: 354 (87.2%)
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK-NEXT: [4] Total number of buffer entries.
# CHECK: [1] [2] [3] [4]
-# CHECK-NEXT: PdEX 32 36 40
+# CHECK-NEXT: PdEX 31 34 40
# CHECK-NEXT: PdFPU 0 0 64
-# CHECK-NEXT: PdLoad 37 40 40
+# CHECK-NEXT: PdLoad 36 40 40
# CHECK-NEXT: PdStore 0 0 24
# CHECK: Resources:
# CHECK-NEXT: RAT - Register unavailable: 0
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
-# CHECK-NEXT: LQ - Load queue full: 532 (87.9%)
+# CHECK-NEXT: LQ - Load queue full: 533 (88.1%)
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK-NEXT: [4] Total number of buffer entries.
# CHECK: [1] [2] [3] [4]
-# CHECK-NEXT: PdEX 34 38 40
-# CHECK-NEXT: PdFPU 34 38 64
+# CHECK-NEXT: PdEX 33 36 40
+# CHECK-NEXT: PdFPU 33 36 64
# CHECK-NEXT: PdLoad 37 40 40
# CHECK-NEXT: PdStore 0 0 24
# CHECK-NEXT: RAT - Register unavailable: 0
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
-# CHECK-NEXT: LQ - Load queue full: 532 (87.9%)
+# CHECK-NEXT: LQ - Load queue full: 533 (88.1%)
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK-NEXT: [4] Total number of buffer entries.
# CHECK: [1] [2] [3] [4]
-# CHECK-NEXT: PdEX 34 38 40
-# CHECK-NEXT: PdFPU 34 38 64
+# CHECK-NEXT: PdEX 33 36 40
+# CHECK-NEXT: PdFPU 33 36 64
# CHECK-NEXT: PdLoad 37 40 40
# CHECK-NEXT: PdStore 0 0 24
# CHECK-NEXT: RAT - Register unavailable: 0
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
-# CHECK-NEXT: LQ - Load queue full: 344 (56.9%)
+# CHECK-NEXT: LQ - Load queue full: 345 (57.0%)
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK-NEXT: [4] Total number of buffer entries.
# CHECK: [1] [2] [3] [4]
-# CHECK-NEXT: PdEX 33 38 40
-# CHECK-NEXT: PdFPU 33 38 64
-# CHECK-NEXT: PdLoad 37 40 40
+# CHECK-NEXT: PdEX 33 36 40
+# CHECK-NEXT: PdFPU 33 36 64
+# CHECK-NEXT: PdLoad 36 40 40
# CHECK-NEXT: PdStore 0 0 24
# CHECK: Resources:
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
# CHECK-NEXT: LQ - Load queue full: 0
-# CHECK-NEXT: SQ - Store queue full: 370 (91.8%)
+# CHECK-NEXT: SQ - Store queue full: 371 (92.1%)
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
-# CHECK-NEXT: 0, 25 (6.2%)
-# CHECK-NEXT: 1, 370 (91.8%)
-# CHECK-NEXT: 2, 1 (0.2%)
+# CHECK-NEXT: 0, 24 (6.0%)
+# CHECK-NEXT: 1, 372 (92.3%)
# CHECK-NEXT: 4, 7 (1.7%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [4] Total number of buffer entries.
# CHECK: [1] [2] [3] [4]
-# CHECK-NEXT: PdEX 22 23 40
+# CHECK-NEXT: PdEX 21 22 40
# CHECK-NEXT: PdFPU 0 0 64
# CHECK-NEXT: PdLoad 0 0 40
-# CHECK-NEXT: PdStore 23 24 24
+# CHECK-NEXT: PdStore 22 23 24
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
# CHECK-NEXT: LQ - Load queue full: 0
-# CHECK-NEXT: SQ - Store queue full: 370 (91.8%)
+# CHECK-NEXT: SQ - Store queue full: 371 (92.1%)
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
-# CHECK-NEXT: 0, 25 (6.2%)
-# CHECK-NEXT: 1, 370 (91.8%)
-# CHECK-NEXT: 2, 1 (0.2%)
+# CHECK-NEXT: 0, 24 (6.0%)
+# CHECK-NEXT: 1, 372 (92.3%)
# CHECK-NEXT: 4, 7 (1.7%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [4] Total number of buffer entries.
# CHECK: [1] [2] [3] [4]
-# CHECK-NEXT: PdEX 22 23 40
+# CHECK-NEXT: PdEX 21 22 40
# CHECK-NEXT: PdFPU 0 0 64
# CHECK-NEXT: PdLoad 0 0 40
-# CHECK-NEXT: PdStore 23 24 24
+# CHECK-NEXT: PdStore 22 23 24
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
# CHECK-NEXT: LQ - Load queue full: 0
-# CHECK-NEXT: SQ - Store queue full: 370 (91.8%)
+# CHECK-NEXT: SQ - Store queue full: 371 (92.1%)
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
-# CHECK-NEXT: 0, 25 (6.2%)
-# CHECK-NEXT: 1, 370 (91.8%)
-# CHECK-NEXT: 2, 1 (0.2%)
+# CHECK-NEXT: 0, 24 (6.0%)
+# CHECK-NEXT: 1, 372 (92.3%)
# CHECK-NEXT: 4, 7 (1.7%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [4] Total number of buffer entries.
# CHECK: [1] [2] [3] [4]
-# CHECK-NEXT: PdEX 22 23 40
+# CHECK-NEXT: PdEX 21 22 40
# CHECK-NEXT: PdFPU 0 0 64
# CHECK-NEXT: PdLoad 0 0 40
-# CHECK-NEXT: PdStore 23 24 24
+# CHECK-NEXT: PdStore 22 23 24
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
# CHECK-NEXT: LQ - Load queue full: 0
-# CHECK-NEXT: SQ - Store queue full: 370 (91.8%)
+# CHECK-NEXT: SQ - Store queue full: 371 (92.1%)
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
-# CHECK-NEXT: 0, 25 (6.2%)
-# CHECK-NEXT: 1, 370 (91.8%)
-# CHECK-NEXT: 2, 1 (0.2%)
+# CHECK-NEXT: 0, 24 (6.0%)
+# CHECK-NEXT: 1, 372 (92.3%)
# CHECK-NEXT: 4, 7 (1.7%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [4] Total number of buffer entries.
# CHECK: [1] [2] [3] [4]
-# CHECK-NEXT: PdEX 22 23 40
+# CHECK-NEXT: PdEX 21 22 40
# CHECK-NEXT: PdFPU 0 0 64
# CHECK-NEXT: PdLoad 0 0 40
-# CHECK-NEXT: PdStore 23 24 24
+# CHECK-NEXT: PdStore 22 23 24
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
# CHECK-NEXT: LQ - Load queue full: 0
-# CHECK-NEXT: SQ - Store queue full: 747 (93.0%)
+# CHECK-NEXT: SQ - Store queue full: 748 (93.2%)
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [4] Total number of buffer entries.
# CHECK: [1] [2] [3] [4]
-# CHECK-NEXT: PdEX 22 23 40
-# CHECK-NEXT: PdFPU 22 23 64
+# CHECK-NEXT: PdEX 21 23 40
+# CHECK-NEXT: PdFPU 21 23 64
# CHECK-NEXT: PdLoad 0 0 40
-# CHECK-NEXT: PdStore 23 24 24
+# CHECK-NEXT: PdStore 22 24 24
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 0
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
-# CHECK-NEXT: SCHEDQ - Scheduler full: 185 (30.7%)
+# CHECK-NEXT: SCHEDQ - Scheduler full: 0
# CHECK-NEXT: LQ - Load queue full: 0
-# CHECK-NEXT: SQ - Store queue full: 372 (61.8%)
+# CHECK-NEXT: SQ - Store queue full: 559 (92.9%)
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
-# CHECK-NEXT: 0, 223 (37.0%)
-# CHECK-NEXT: 1, 372 (61.8%)
-# CHECK-NEXT: 4, 7 (1.2%)
+# CHECK-NEXT: 0, 222 (36.9%)
+# CHECK-NEXT: 1, 373 (62.0%)
+# CHECK-NEXT: 3, 1 (0.2%)
+# CHECK-NEXT: 4, 6 (1.0%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [# issued], [# cycles]
# CHECK-NEXT: [4] Total number of buffer entries.
# CHECK: [1] [2] [3] [4]
-# CHECK-NEXT: PdEX 22 24 40
-# CHECK-NEXT: PdFPU 22 24 64
+# CHECK-NEXT: PdEX 21 23 40
+# CHECK-NEXT: PdFPU 21 23 64
# CHECK-NEXT: PdLoad 0 0 40
-# CHECK-NEXT: PdStore 23 24 24
+# CHECK-NEXT: PdStore 22 24 24
# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 0
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
-# CHECK-NEXT: SCHEDQ - Scheduler full: 5963 (83.2%)
+# CHECK-NEXT: SCHEDQ - Scheduler full: 5777 (80.6%)
# CHECK-NEXT: LQ - Load queue full: 0
-# CHECK-NEXT: SQ - Store queue full: 374 (5.2%)
+# CHECK-NEXT: SQ - Store queue full: 561 (7.8%)
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: