From 628018073d775a1309c50c5ce9bd7bc7b490a50b Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Mon, 18 Feb 2019 14:15:25 +0000 Subject: [PATCH] [MCA] Correctly update register definitions in the PRF after move elimination. This patch fixes a bug where register writes performed by optimizable register moves were sometimes wrongly treated like partial register updates. Before this patch, llvm-mca wrongly predicted a 1.50 IPC for test reg-move-elimination-6.s (added by this patch). With this patch, llvm-mca correctly updates the register defintions in the PRF, and the IPC for that test is now correctly reported as 2. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354271 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MCA/HardwareUnits/RegisterFile.cpp | 23 ++-- .../X86/BtVer2/reg-move-elimination-6.s | 119 ++++++++++++++++++ 2 files changed, 128 insertions(+), 14 deletions(-) create mode 100644 test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-6.s diff --git a/lib/MCA/HardwareUnits/RegisterFile.cpp b/lib/MCA/HardwareUnits/RegisterFile.cpp index 995c50fc6a8..86a888ea8ca 100644 --- a/lib/MCA/HardwareUnits/RegisterFile.cpp +++ b/lib/MCA/HardwareUnits/RegisterFile.cpp @@ -330,30 +330,25 @@ bool RegisterFile::tryEliminateMove(WriteState &WS, ReadState &RS) { if (RMT.AllowZeroMoveEliminationOnly && !IsZeroMove) return false; - MCPhysReg FromReg = RS.getRegisterID(); - MCPhysReg ToReg = WS.getRegisterID(); - // Construct an alias. - MCPhysReg AliasReg = FromReg; - if (RRIFrom.RenameAs) - AliasReg = RRIFrom.RenameAs; + MCPhysReg AliasedReg = + RRIFrom.RenameAs ? RRIFrom.RenameAs : RS.getRegisterID(); + MCPhysReg AliasReg = RRITo.RenameAs ? RRITo.RenameAs : WS.getRegisterID(); - const RegisterRenamingInfo &RMAlias = RegisterMappings[AliasReg].second; + const RegisterRenamingInfo &RMAlias = RegisterMappings[AliasedReg].second; if (RMAlias.AliasRegID) - AliasReg = RMAlias.AliasRegID; + AliasedReg = RMAlias.AliasRegID; - if (AliasReg != ToReg) { - RegisterMappings[ToReg].second.AliasRegID = AliasReg; - for (MCSubRegIterator I(ToReg, &MRI); I.isValid(); ++I) - RegisterMappings[*I].second.AliasRegID = AliasReg; - } + RegisterMappings[AliasReg].second.AliasRegID = AliasedReg; + for (MCSubRegIterator I(AliasReg, &MRI); I.isValid(); ++I) + RegisterMappings[*I].second.AliasRegID = AliasedReg; - RMT.NumMoveEliminated++; if (IsZeroMove) { WS.setWriteZero(); RS.setReadZero(); } WS.setEliminated(); + RMT.NumMoveEliminated++; return true; } diff --git a/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-6.s b/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-6.s new file mode 100644 index 00000000000..d4800943b8b --- /dev/null +++ b/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-6.s @@ -0,0 +1,119 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=3 -register-file-stats < %s | FileCheck %s + +xor %rsi, %rsi +add %rcx, %rcx +add %rcx, %rcx +add %rcx, %rcx +add %rcx, %rcx +mov %esi, %ecx + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 600 +# CHECK-NEXT: Total Cycles: 304 +# CHECK-NEXT: Total uOps: 600 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 1.97 +# CHECK-NEXT: IPC: 1.97 +# CHECK-NEXT: Block RThroughput: 3.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 0 0.50 xorq %rsi, %rsi +# CHECK-NEXT: 1 1 0.50 addq %rcx, %rcx +# CHECK-NEXT: 1 1 0.50 addq %rcx, %rcx +# CHECK-NEXT: 1 1 0.50 addq %rcx, %rcx +# CHECK-NEXT: 1 1 0.50 addq %rcx, %rcx +# CHECK-NEXT: 1 1 0.50 movl %esi, %ecx + +# CHECK: Register File statistics: +# CHECK-NEXT: Total number of mappings created: 800 +# CHECK-NEXT: Max number of mappings used: 12 + +# CHECK: * Register File #1 -- JFpuPRF: +# CHECK-NEXT: Number of physical registers: 72 +# CHECK-NEXT: Total number of mappings created: 0 +# CHECK-NEXT: Max number of mappings used: 0 + +# CHECK: * Register File #2 -- JIntegerPRF: +# CHECK-NEXT: Number of physical registers: 64 +# CHECK-NEXT: Total number of mappings created: 800 +# CHECK-NEXT: Max number of mappings used: 12 +# CHECK-NEXT: Number of optimizable moves: 100 +# CHECK-NEXT: Number of moves eliminated: 100 (100.0%) +# CHECK-NEXT: Number of zero moves: 100 (100.0%) +# CHECK-NEXT: Max moves eliminated per cycle: 1 + +# CHECK: Resources: +# CHECK-NEXT: [0] - JALU0 +# CHECK-NEXT: [1] - JALU1 +# CHECK-NEXT: [2] - JDiv +# CHECK-NEXT: [3] - JFPA +# CHECK-NEXT: [4] - JFPM +# CHECK-NEXT: [5] - JFPU0 +# CHECK-NEXT: [6] - JFPU1 +# CHECK-NEXT: [7] - JLAGU +# CHECK-NEXT: [8] - JMul +# CHECK-NEXT: [9] - JSAGU +# CHECK-NEXT: [10] - JSTC +# CHECK-NEXT: [11] - JVALU0 +# CHECK-NEXT: [12] - JVALU1 +# CHECK-NEXT: [13] - JVIMUL + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: 2.00 2.00 - - - - - - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: +# CHECK-NEXT: - - - - - - - - - - - - - - xorq %rsi, %rsi +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - addq %rcx, %rcx +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - addq %rcx, %rcx +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - addq %rcx, %rcx +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - addq %rcx, %rcx +# CHECK-NEXT: - - - - - - - - - - - - - - movl %esi, %ecx + +# CHECK: Timeline view: +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DR . . . xorq %rsi, %rsi +# CHECK-NEXT: [0,1] DeER . . . addq %rcx, %rcx +# CHECK-NEXT: [0,2] .DeER. . . addq %rcx, %rcx +# CHECK-NEXT: [0,3] .D=eER . . addq %rcx, %rcx +# CHECK-NEXT: [0,4] . D=eER . . addq %rcx, %rcx +# CHECK-NEXT: [0,5] . D---R . . movl %esi, %ecx +# CHECK-NEXT: [1,0] . D---R . . xorq %rsi, %rsi +# CHECK-NEXT: [1,1] . DeE-R . . addq %rcx, %rcx +# CHECK-NEXT: [1,2] . DeE-R . . addq %rcx, %rcx +# CHECK-NEXT: [1,3] . D=eER . . addq %rcx, %rcx +# CHECK-NEXT: [1,4] . D=eER. . addq %rcx, %rcx +# CHECK-NEXT: [1,5] . D---R. . movl %esi, %ecx +# CHECK-NEXT: [2,0] . .D---R . xorq %rsi, %rsi +# CHECK-NEXT: [2,1] . .DeE-R . addq %rcx, %rcx +# CHECK-NEXT: [2,2] . . DeE-R. addq %rcx, %rcx +# CHECK-NEXT: [2,3] . . D=eER. addq %rcx, %rcx +# CHECK-NEXT: [2,4] . . D=eER addq %rcx, %rcx +# CHECK-NEXT: [2,5] . . D---R movl %esi, %ecx + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 3 0.0 0.0 2.0 xorq %rsi, %rsi +# CHECK-NEXT: 1. 3 1.0 1.0 0.7 addq %rcx, %rcx +# CHECK-NEXT: 2. 3 1.0 0.0 0.7 addq %rcx, %rcx +# CHECK-NEXT: 3. 3 2.0 0.0 0.0 addq %rcx, %rcx +# CHECK-NEXT: 4. 3 2.0 0.0 0.0 addq %rcx, %rcx +# CHECK-NEXT: 5. 3 0.0 0.0 3.0 movl %esi, %ecx -- 2.40.0