From 60eba9fc37f891867b764d77ea244c2448ade683 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Thu, 28 Mar 2019 13:40:34 +0000 Subject: [PATCH] [X86] AMD Piledriver (BdVer2): fine-tune some latencies Based on llvm-exegesis measurements. Now that llvm-exegesis is ~2 magnitudes faster, and is a bit smarter, it is now possible to continue cleanup of the scheduler model. With this, there are no more latency inconsistencies for the opcodes that produce stable measurements, and only a few inconsistencies for unstable measurements (MMX_* opcodes, opcodes that llvm-exegesis measures by chaining - CMP, TEST, BT, SETcc, CVT, MOV, etc.) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@357169 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ScheduleBdVer2.td | 78 +++++++++------ .../X86/BdVer2/int-to-fpu-forwarding-2.s | 32 +++---- test/tools/llvm-mca/X86/BdVer2/pipes-fpu.s | 34 +++---- .../X86/BdVer2/reg-move-elimination-2.s | 94 +++++++++---------- .../X86/BdVer2/reg-move-elimination-3.s | 76 +++++++-------- .../llvm-mca/X86/BdVer2/resources-avx1.s | 50 +++++----- .../tools/llvm-mca/X86/BdVer2/resources-mmx.s | 8 +- .../llvm-mca/X86/BdVer2/resources-sse1.s | 8 +- .../llvm-mca/X86/BdVer2/resources-sse2.s | 24 ++--- .../llvm-mca/X86/BdVer2/resources-sse41.s | 8 +- .../llvm-mca/X86/BdVer2/resources-sse42.s | 26 ++--- .../llvm-mca/X86/BdVer2/resources-x86_64.s | 32 +++---- .../tools/llvm-mca/X86/BdVer2/resources-xop.s | 2 +- 13 files changed, 247 insertions(+), 225 deletions(-) diff --git a/lib/Target/X86/X86ScheduleBdVer2.td b/lib/Target/X86/X86ScheduleBdVer2.td index 8e8fc6fd1ff..82920ad43c6 100644 --- a/lib/Target/X86/X86ScheduleBdVer2.td +++ b/lib/Target/X86/X86ScheduleBdVer2.td @@ -386,14 +386,8 @@ def PdWriteCMPXCHG16B : SchedWriteRes<[PdEX1]> { } def : InstRW<[PdWriteCMPXCHG16B], (instrs CMPXCHG16B)>; -def PdWriteXCHG16rr : SchedWriteRes<[PdEX1]> { - let Latency = 2; - let NumMicroOps = 2; -} -def : InstRW<[PdWriteXCHG16rr], (instrs XCHG16rr)>; - def PdWriteXADD : SchedWriteRes<[PdEX1]> { - let Latency = 2; + let Latency = 1; let NumMicroOps = 4; } def : InstRW<[PdWriteXADD], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr)>; @@ -426,7 +420,7 @@ defm : PdWriteResExPair; defm : PdWriteResExPair; -defm : PdWriteResExPair; +defm : PdWriteResExPair; def PdWriteCRC32r32r16 : SchedWriteRes<[PdEX01]> { let Latency = 5; @@ -547,11 +541,17 @@ def PdWriteRCR16ri : SchedWriteRes<[PdEX01]> { } def : InstRW<[PdWriteRCR16ri], (instrs RCR16ri)>; -def PdWriteRCL32rCLRCL64rCL : SchedWriteRes<[PdEX01]> { +def PdWriteRCL32rCL : SchedWriteRes<[PdEX01]> { let Latency = 7; let NumMicroOps = 17; } -def : InstRW<[PdWriteRCL32rCLRCL64rCL], (instrs RCL32rCL, RCL64rCL)>; +def : InstRW<[PdWriteRCL32rCL], (instrs RCL32rCL)>; + +def PdWriteRCL64rCL : SchedWriteRes<[PdEX01]> { + let Latency = 8; + let NumMicroOps = 17; +} +def : InstRW<[PdWriteRCL64rCL], (instrs RCL64rCL)>; def PdWriteRCR64rCL : SchedWriteRes<[PdEX01]> { let Latency = 7; @@ -597,8 +597,8 @@ def PdWriteRCL8ri : SchedWriteRes<[PdEX01]> { def : InstRW<[PdWriteRCL8ri], (instrs RCL8ri)>; // SHLD/SHRD. -defm : PdWriteRes; -defm : PdWriteRes; +defm : PdWriteRes; +defm : PdWriteRes; def PdWriteSHLD32rri8SHRD16rri8 : SchedWriteRes<[PdEX01]> { let Latency = 3; @@ -608,7 +608,7 @@ def PdWriteSHLD32rri8SHRD16rri8 : SchedWriteRes<[PdEX01]> { def : InstRW<[PdWriteSHLD32rri8SHRD16rri8 ], (instrs SHLD32rri8, SHRD16rri8)>; def PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL : SchedWriteRes<[PdEX01]> { - let Latency = 4; + let Latency = 3; let ResourceCycles = [8]; let NumMicroOps = 7; } @@ -715,7 +715,7 @@ defm : PdWriteResYMMPair; def PdWriteVDPPSrri : SchedWriteRes<[PdFPU1, PdFPFMA]> { - let Latency = 25; + let Latency = 27; let ResourceCycles = [1, 3]; let NumMicroOps = 17; } @@ -875,11 +875,11 @@ defm : X86WriteResPairUnsupported; defm : PdWriteResXMMPair; // FIXME: .Folded version is one NumMicroOp *less*.. -def WriteCVTSI642SDrr : SchedWriteRes<[PdFPU1, PdFPSTO]> { +def PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr : SchedWriteRes<[PdFPU1, PdFPSTO]> { let Latency = 13; let NumMicroOps = 2; } -def : InstRW<[WriteCVTSI642SDrr], (instrs CVTSI642SDrr, CVTSI642SSrr)>; +def : InstRW<[PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr], (instrs CVTSI642SDrr, CVTSI642SSrr, CVTSI2SDrr, CVTSI2SSrr)>; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; @@ -952,11 +952,20 @@ defm : PdWriteRes; defm : PdWriteRes; -defm : PdWriteRes; +defm : PdWriteRes; defm : PdWriteRes; -defm : PdWriteRes; -defm : PdWriteRes; +def PdWriteMOVDQArr : SchedWriteRes<[PdFPU01, PdFPMAL]> { +} +def : InstRW<[PdWriteMOVDQArr], (instrs MOVDQArr)>; + +def PdWriteMOVQ2DQrr : SchedWriteRes<[PdFPU01, PdFPMAL]> { + let Latency = 4; +} +def : InstRW<[PdWriteMOVQ2DQrr], (instrs MMX_MOVQ2DQrr)>; + +defm : PdWriteRes; +defm : PdWriteRes; defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; @@ -982,17 +991,24 @@ defm : PdWriteResXMMPair; defm : X86WriteResPairUnsupported; -def JWriteVPMACS : SchedWriteRes<[PdFPU0, PdFPU01, PdFPMMA, PdFPMAL]> { +def PdWriteVPMACS : SchedWriteRes<[PdFPU0, PdFPU01, PdFPMMA, PdFPMAL]> { let Latency = 4; let ResourceCycles = [2, 1, 2, 1]; } -def : InstRW<[JWriteVPMACS], (instrs VPMACSDQHrr, VPMACSDQLrr, VPMACSSDQHrr, +def : InstRW<[PdWriteVPMACS], (instrs VPMACSDQHrr, VPMACSDQLrr, VPMACSSDQHrr, VPMACSSDQLrr)>; defm : PdWriteResXMMPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; +def PdWriteVMPSADBW : SchedWriteRes<[PdFPU0, PdFPMMA]> { + let Latency = 8; + let ResourceCycles = [1, 2]; + let NumMicroOps = 9; +} +def : InstRW<[PdWriteVMPSADBW], (instrs VMPSADBWrri)>; + defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; defm : X86WriteResPairUnsupported; @@ -1010,6 +1026,12 @@ defm : PdWriteResXMMPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; +def PdWriteVPPERM : SchedWriteRes<[PdFPU01, PdFPMAL]> { + let Latency = 2; + let ResourceCycles = [1, 4]; +} +def : InstRW<[PdWriteVPPERM], (instrs VPPERMrrr, VPPERMrrr_REV)>; + defm : PdWriteResXMMPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; @@ -1041,7 +1063,7 @@ defm : X86WriteResPairUnsupported; defm : PdWriteRes; defm : PdWriteRes; -defm : PdWriteRes; +defm : PdWriteRes; defm : PdWriteRes; def PdWriteEXTRQ : SchedWriteRes<[PdFPU01, PdFPMAL]> { @@ -1053,19 +1075,19 @@ def : InstRW<[PdWriteEXTRQ], (instrs EXTRQ, EXTRQI)>; // SSE42 String instructions. //////////////////////////////////////////////////////////////////////////////// -defm : PdWriteResXMMPair; -defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; -defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; defm : PdWriteResXMMPair; //////////////////////////////////////////////////////////////////////////////// // MOVMSK Instructions. //////////////////////////////////////////////////////////////////////////////// -defm : PdWriteRes; +defm : PdWriteRes; -defm : PdWriteRes; +defm : PdWriteRes; defm : X86WriteResUnsupported; // defm : X86WriteResUnsupported; @@ -1113,7 +1135,7 @@ def : InstRW<[WritePHAdd.Folded], (instrs PHADDDrm, PHSUBDrm, defm : PdWriteResXMMPair; def PdWriteVPCLMULQDQrr : SchedWriteRes<[PdFPU0, PdFPMMA]> { - let Latency = 13; + let Latency = 12; let NumMicroOps = 6; } def : InstRW<[PdWriteVPCLMULQDQrr], (instrs VPCLMULQDQrr)>; diff --git a/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s b/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s index 2b31da317fb..f5ef23e3ace 100644 --- a/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s +++ b/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s @@ -141,12 +141,12 @@ movq %rcx, %xmm0 # CHECK: Iterations: 500 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 506 +# CHECK-NEXT: Total Cycles: 515 # CHECK-NEXT: Total uOps: 1000 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.98 -# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: uOps Per Cycle: 1.94 +# CHECK-NEXT: IPC: 0.97 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: @@ -158,7 +158,7 @@ movq %rcx, %xmm0 # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 14 1.00 cvtsi2ssl %ecx, %xmm0 +# CHECK-NEXT: 2 13 1.00 cvtsi2ssl %ecx, %xmm0 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -197,12 +197,12 @@ movq %rcx, %xmm0 # CHECK: Iterations: 500 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 506 +# CHECK-NEXT: Total Cycles: 515 # CHECK-NEXT: Total uOps: 1000 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.98 -# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: uOps Per Cycle: 1.94 +# CHECK-NEXT: IPC: 0.97 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: @@ -214,7 +214,7 @@ movq %rcx, %xmm0 # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 14 1.00 cvtsi2sdl %ecx, %xmm0 +# CHECK-NEXT: 2 13 1.00 cvtsi2sdl %ecx, %xmm0 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -253,12 +253,12 @@ movq %rcx, %xmm0 # CHECK: Iterations: 500 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 262 +# CHECK-NEXT: Total Cycles: 263 # CHECK-NEXT: Total uOps: 1000 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 3.82 -# CHECK-NEXT: IPC: 1.91 +# CHECK-NEXT: uOps Per Cycle: 3.80 +# CHECK-NEXT: IPC: 1.90 # CHECK-NEXT: Block RThroughput: 0.5 # CHECK: Instruction Info: @@ -270,7 +270,7 @@ movq %rcx, %xmm0 # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 10 0.50 movd %ecx, %xmm0 +# CHECK-NEXT: 2 11 0.50 movd %ecx, %xmm0 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -309,12 +309,12 @@ movq %rcx, %xmm0 # CHECK: Iterations: 500 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 262 +# CHECK-NEXT: Total Cycles: 263 # CHECK-NEXT: Total uOps: 1000 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 3.82 -# CHECK-NEXT: IPC: 1.91 +# CHECK-NEXT: uOps Per Cycle: 3.80 +# CHECK-NEXT: IPC: 1.90 # CHECK-NEXT: Block RThroughput: 0.5 # CHECK: Instruction Info: @@ -326,7 +326,7 @@ movq %rcx, %xmm0 # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 10 0.50 movq %rcx, %xmm0 +# CHECK-NEXT: 2 11 0.50 movq %rcx, %xmm0 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 diff --git a/test/tools/llvm-mca/X86/BdVer2/pipes-fpu.s b/test/tools/llvm-mca/X86/BdVer2/pipes-fpu.s index 2cd66ae1630..d17d13184e7 100644 --- a/test/tools/llvm-mca/X86/BdVer2/pipes-fpu.s +++ b/test/tools/llvm-mca/X86/BdVer2/pipes-fpu.s @@ -39,7 +39,7 @@ vsqrtps %ymm0, %ymm2 # CHECK-NEXT: 1 5 2.00 vpmulld %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 2 0.50 vpand %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 4 1.00 vcvttps2dq %xmm0, %xmm2 -# CHECK-NEXT: 6 13 1.00 vpclmulqdq $0, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 6 12 1.00 vpclmulqdq $0, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 5 1.00 vaddps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 9 10.50 vsqrtps %xmm0, %xmm2 # CHECK-NEXT: 2 5 2.00 vaddps %ymm0, %ymm1, %ymm2 @@ -92,15 +92,15 @@ vsqrtps %ymm0, %ymm2 # CHECK: [0,0] DeeeeeER . . . . . . . . . . . vpmulld %xmm0, %xmm1, %xmm2 # CHECK-NEXT: [0,1] D=eeE--R . . . . . . . . . . . vpand %xmm0, %xmm1, %xmm2 # CHECK-NEXT: [0,2] D==eeeeER . . . . . . . . . . . vcvttps2dq %xmm0, %xmm2 -# CHECK-NEXT: [0,3] .D=eeeeeeeeeeeeeER . . . . . . . . . vpclmulqdq $0, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [0,4] . D=eeeeeE-------R . . . . . . . . . vaddps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [0,5] . D=eeeeeeeeeE---R . . . . . . . . . vsqrtps %xmm0, %xmm2 -# CHECK-NEXT: [0,6] . D=eeeeeE------R . . . . . . . . . vaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: [0,7] . D==eeeeeeeeeE--R . . . . . . . . . vsqrtps %ymm0, %ymm2 -# CHECK-NEXT: [1,0] . D===eeeeeE----R . . . . . . . . . vpmulld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [1,1] . DeeE----------R . . . . . . . . . vpand %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [1,2] . D====eeeeE----R . . . . . . . . . vcvttps2dq %xmm0, %xmm2 -# CHECK-NEXT: [1,3] . D=eeeeeeeeeeeeeER . . . . . . . . vpclmulqdq $0, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [0,3] .D=eeeeeeeeeeeeER . . . . . . . . . vpclmulqdq $0, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [0,4] . D=eeeeeE------R . . . . . . . . . vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [0,5] . D=eeeeeeeeeE--R . . . . . . . . . vsqrtps %xmm0, %xmm2 +# CHECK-NEXT: [0,6] . D=eeeeeE-----R . . . . . . . . . vaddps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: [0,7] . D==eeeeeeeeeE-R . . . . . . . . . vsqrtps %ymm0, %ymm2 +# CHECK-NEXT: [1,0] . D===eeeeeE---R . . . . . . . . . vpmulld %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [1,1] . DeeE---------R . . . . . . . . . vpand %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [1,2] . D====eeeeE---R . . . . . . . . . vcvttps2dq %xmm0, %xmm2 +# CHECK-NEXT: [1,3] . D=eeeeeeeeeeeeER . . . . . . . . vpclmulqdq $0, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: [1,4] . .D==================eeeeeER . . . . . . vaddps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: [1,5] . .D===================eeeeeeeeeER . . . . . vsqrtps %xmm0, %xmm2 # CHECK-NEXT: [1,6] . . D=======================================eeeeeER . . vaddps %ymm0, %ymm1, %ymm2 @@ -113,11 +113,11 @@ vsqrtps %ymm0, %ymm2 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 2 2.5 2.5 2.0 vpmulld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1. 2 1.5 1.5 6.0 vpand %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 2. 2 4.0 4.0 2.0 vcvttps2dq %xmm0, %xmm2 +# CHECK-NEXT: 0. 2 2.5 2.5 1.5 vpmulld %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1. 2 1.5 1.5 5.5 vpand %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2. 2 4.0 4.0 1.5 vcvttps2dq %xmm0, %xmm2 # CHECK-NEXT: 3. 2 2.0 2.0 0.0 vpclmulqdq $0, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 4. 2 10.5 10.5 3.5 vaddps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 5. 2 11.0 11.0 1.5 vsqrtps %xmm0, %xmm2 -# CHECK-NEXT: 6. 2 21.0 21.0 3.0 vaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 7. 2 22.0 22.0 1.0 vsqrtps %ymm0, %ymm2 +# CHECK-NEXT: 4. 2 10.5 10.5 3.0 vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 5. 2 11.0 11.0 1.0 vsqrtps %xmm0, %xmm2 +# CHECK-NEXT: 6. 2 21.0 21.0 2.5 vaddps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 7. 2 22.0 22.0 0.5 vsqrtps %ymm0, %ymm2 diff --git a/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-2.s b/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-2.s index 2263f592425..d39b525da3d 100644 --- a/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-2.s +++ b/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-2.s @@ -14,12 +14,12 @@ movdqu %xmm5, %xmm0 # CHECK: Iterations: 3 # CHECK-NEXT: Instructions: 27 -# CHECK-NEXT: Total Cycles: 18 +# CHECK-NEXT: Total Cycles: 16 # CHECK-NEXT: Total uOps: 27 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.50 -# CHECK-NEXT: IPC: 1.50 +# CHECK-NEXT: uOps Per Cycle: 1.69 +# CHECK-NEXT: IPC: 1.69 # CHECK-NEXT: Block RThroughput: 3.5 # CHECK: Instruction Info: @@ -38,8 +38,8 @@ movdqu %xmm5, %xmm0 # CHECK-NEXT: 1 1 0.50 movups %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.50 movapd %xmm2, %xmm3 # CHECK-NEXT: 1 1 0.50 movupd %xmm3, %xmm4 -# CHECK-NEXT: 1 2 0.50 movdqa %xmm4, %xmm5 -# CHECK-NEXT: 1 2 0.50 movdqu %xmm5, %xmm0 +# CHECK-NEXT: 1 1 0.50 movdqa %xmm4, %xmm5 +# CHECK-NEXT: 1 1 0.50 movdqu %xmm5, %xmm0 # CHECK: Register File statistics: # CHECK-NEXT: Total number of mappings created: 21 @@ -87,46 +87,46 @@ movdqu %xmm5, %xmm0 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions: # CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - pxor %mm0, %mm0 -# CHECK-NEXT: - - - - - - - - - - 0.67 0.33 - - - 1.00 - - - - - - - movq %mm0, %mm1 +# CHECK-NEXT: - - - - - - - - - - 0.33 0.67 - - - 1.00 - - - - - - - movq %mm0, %mm1 # CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - xorps %xmm0, %xmm0 # CHECK-NEXT: - - - - - - - - - 1.00 - - - - 0.33 0.67 - - - - - - - movaps %xmm0, %xmm1 -# CHECK-NEXT: - - - - - - - - 1.00 - - - - - 0.33 0.67 - - - - - - - movups %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - - 1.00 - - - - 1.00 - - - - - - - - movapd %xmm2, %xmm3 -# CHECK-NEXT: - - - - - - - - 1.00 - - - - - 0.33 0.67 - - - - - - - movupd %xmm3, %xmm4 +# CHECK-NEXT: - - - - - - - - 1.00 - - - - - - 1.00 - - - - - - - movups %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - - - 0.67 0.33 - - - - - - - movapd %xmm2, %xmm3 +# CHECK-NEXT: - - - - - - - - 1.00 - - - - - 0.67 0.33 - - - - - - - movupd %xmm3, %xmm4 # CHECK-NEXT: - - - - - - - - - - 0.67 0.33 - - 1.00 - - - - - - - - movdqa %xmm4, %xmm5 -# CHECK-NEXT: - - - - - - - - - - - 1.00 - - 0.33 0.67 - - - - - - - movdqu %xmm5, %xmm0 +# CHECK-NEXT: - - - - - - - - - - 0.33 0.67 - - 0.67 0.33 - - - - - - - movdqu %xmm5, %xmm0 # CHECK: Timeline view: -# CHECK-NEXT: 01234567 +# CHECK-NEXT: 012345 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DR . . . . pxor %mm0, %mm0 -# CHECK-NEXT: [0,1] DeeER. . . . movq %mm0, %mm1 -# CHECK-NEXT: [0,2] D---R. . . . xorps %xmm0, %xmm0 -# CHECK-NEXT: [0,3] DeE-R. . . . movaps %xmm0, %xmm1 -# CHECK-NEXT: [0,4] .DeER. . . . movups %xmm1, %xmm2 -# CHECK-NEXT: [0,5] .D=eER . . . movapd %xmm2, %xmm3 -# CHECK-NEXT: [0,6] .D==eER . . . movupd %xmm3, %xmm4 -# CHECK-NEXT: [0,7] .D===eeER . . . movdqa %xmm4, %xmm5 -# CHECK-NEXT: [0,8] . D====eeER . . movdqu %xmm5, %xmm0 -# CHECK-NEXT: [1,0] . D-------R . . pxor %mm0, %mm0 -# CHECK-NEXT: [1,1] . DeeE----R . . movq %mm0, %mm1 -# CHECK-NEXT: [1,2] . D-------R . . xorps %xmm0, %xmm0 -# CHECK-NEXT: [1,3] . DeE-----R . . movaps %xmm0, %xmm1 -# CHECK-NEXT: [1,4] . D=eE----R . . movups %xmm1, %xmm2 -# CHECK-NEXT: [1,5] . D==eE---R . . movapd %xmm2, %xmm3 -# CHECK-NEXT: [1,6] . D===eE--R . . movupd %xmm3, %xmm4 -# CHECK-NEXT: [1,7] . D===eeE-R . . movdqa %xmm4, %xmm5 -# CHECK-NEXT: [1,8] . D=====eeER . . movdqu %xmm5, %xmm0 -# CHECK-NEXT: [2,0] . D--------R . . pxor %mm0, %mm0 -# CHECK-NEXT: [2,1] . D=eeE----R . . movq %mm0, %mm1 -# CHECK-NEXT: [2,2] . D-------R . . xorps %xmm0, %xmm0 -# CHECK-NEXT: [2,3] . D==eE----R. . movaps %xmm0, %xmm1 -# CHECK-NEXT: [2,4] . D===eE---R. . movups %xmm1, %xmm2 -# CHECK-NEXT: [2,5] . D====eE--R. . movapd %xmm2, %xmm3 -# CHECK-NEXT: [2,6] . .D====eE-R. . movupd %xmm3, %xmm4 -# CHECK-NEXT: [2,7] . .D=====eeER . movdqa %xmm4, %xmm5 -# CHECK-NEXT: [2,8] . .D=======eeER movdqu %xmm5, %xmm0 +# CHECK: [0,0] DR . . . pxor %mm0, %mm0 +# CHECK-NEXT: [0,1] DeeER. . . movq %mm0, %mm1 +# CHECK-NEXT: [0,2] D---R. . . xorps %xmm0, %xmm0 +# CHECK-NEXT: [0,3] DeE-R. . . movaps %xmm0, %xmm1 +# CHECK-NEXT: [0,4] .DeER. . . movups %xmm1, %xmm2 +# CHECK-NEXT: [0,5] .D=eER . . movapd %xmm2, %xmm3 +# CHECK-NEXT: [0,6] .D==eER . . movupd %xmm3, %xmm4 +# CHECK-NEXT: [0,7] .D===eER . . movdqa %xmm4, %xmm5 +# CHECK-NEXT: [0,8] . D===eER . . movdqu %xmm5, %xmm0 +# CHECK-NEXT: [1,0] . D-----R . . pxor %mm0, %mm0 +# CHECK-NEXT: [1,1] . DeeE--R . . movq %mm0, %mm1 +# CHECK-NEXT: [1,2] . D-----R . . xorps %xmm0, %xmm0 +# CHECK-NEXT: [1,3] . DeE---R. . movaps %xmm0, %xmm1 +# CHECK-NEXT: [1,4] . D=eE--R. . movups %xmm1, %xmm2 +# CHECK-NEXT: [1,5] . D==eE-R. . movapd %xmm2, %xmm3 +# CHECK-NEXT: [1,6] . D===eER. . movupd %xmm3, %xmm4 +# CHECK-NEXT: [1,7] . D===eER . movdqa %xmm4, %xmm5 +# CHECK-NEXT: [1,8] . D====eER . movdqu %xmm5, %xmm0 +# CHECK-NEXT: [2,0] . D------R . pxor %mm0, %mm0 +# CHECK-NEXT: [2,1] . D==eeE-R . movq %mm0, %mm1 +# CHECK-NEXT: [2,2] . D-----R . xorps %xmm0, %xmm0 +# CHECK-NEXT: [2,3] . D==eE--R . movaps %xmm0, %xmm1 +# CHECK-NEXT: [2,4] . D===eE-R . movups %xmm1, %xmm2 +# CHECK-NEXT: [2,5] . D====eER . movapd %xmm2, %xmm3 +# CHECK-NEXT: [2,6] . .D====eER . movupd %xmm3, %xmm4 +# CHECK-NEXT: [2,7] . .D=====eER. movdqa %xmm4, %xmm5 +# CHECK-NEXT: [2,8] . .D======eER movdqu %xmm5, %xmm0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -135,12 +135,12 @@ movdqu %xmm5, %xmm0 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 3 0.0 0.0 5.0 pxor %mm0, %mm0 -# CHECK-NEXT: 1. 3 1.3 1.3 2.7 movq %mm0, %mm1 -# CHECK-NEXT: 2. 3 0.0 0.0 5.7 xorps %xmm0, %xmm0 -# CHECK-NEXT: 3. 3 1.7 1.7 3.3 movaps %xmm0, %xmm1 -# CHECK-NEXT: 4. 3 2.3 0.0 2.3 movups %xmm1, %xmm2 -# CHECK-NEXT: 5. 3 3.3 0.0 1.7 movapd %xmm2, %xmm3 -# CHECK-NEXT: 6. 3 4.0 0.0 1.0 movupd %xmm3, %xmm4 -# CHECK-NEXT: 7. 3 4.7 0.0 0.3 movdqa %xmm4, %xmm5 -# CHECK-NEXT: 8. 3 6.3 0.0 0.0 movdqu %xmm5, %xmm0 +# CHECK-NEXT: 0. 3 0.0 0.0 3.7 pxor %mm0, %mm0 +# CHECK-NEXT: 1. 3 1.7 1.7 1.0 movq %mm0, %mm1 +# CHECK-NEXT: 2. 3 0.0 0.0 4.3 xorps %xmm0, %xmm0 +# CHECK-NEXT: 3. 3 1.7 1.7 2.0 movaps %xmm0, %xmm1 +# CHECK-NEXT: 4. 3 2.3 0.0 1.0 movups %xmm1, %xmm2 +# CHECK-NEXT: 5. 3 3.3 0.0 0.3 movapd %xmm2, %xmm3 +# CHECK-NEXT: 6. 3 4.0 0.0 0.0 movupd %xmm3, %xmm4 +# CHECK-NEXT: 7. 3 4.7 0.0 0.0 movdqa %xmm4, %xmm5 +# CHECK-NEXT: 8. 3 5.3 0.0 0.0 movdqu %xmm5, %xmm0 diff --git a/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-3.s b/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-3.s index 14f2ad2133a..b895d2a9349 100644 --- a/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-3.s +++ b/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-3.s @@ -11,12 +11,12 @@ vmovdqu %xmm5, %xmm0 # CHECK: Iterations: 3 # CHECK-NEXT: Instructions: 21 -# CHECK-NEXT: Total Cycles: 17 +# CHECK-NEXT: Total Cycles: 15 # CHECK-NEXT: Total uOps: 21 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.24 -# CHECK-NEXT: IPC: 1.24 +# CHECK-NEXT: uOps Per Cycle: 1.40 +# CHECK-NEXT: IPC: 1.40 # CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Instruction Info: @@ -33,8 +33,8 @@ vmovdqu %xmm5, %xmm0 # CHECK-NEXT: 1 1 0.50 vmovups %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.50 vmovapd %xmm2, %xmm3 # CHECK-NEXT: 1 1 0.50 vmovupd %xmm3, %xmm4 -# CHECK-NEXT: 1 2 0.50 vmovdqa %xmm4, %xmm5 -# CHECK-NEXT: 1 2 0.50 vmovdqu %xmm5, %xmm0 +# CHECK-NEXT: 1 1 0.50 vmovdqa %xmm4, %xmm5 +# CHECK-NEXT: 1 1 0.50 vmovdqu %xmm5, %xmm0 # CHECK: Register File statistics: # CHECK-NEXT: Total number of mappings created: 18 @@ -83,37 +83,37 @@ vmovdqu %xmm5, %xmm0 # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions: # CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vxorps %xmm0, %xmm0, %xmm0 # CHECK-NEXT: - - - - - - - - 0.33 0.67 - - - - 0.67 0.33 - - - - - - - vmovaps %xmm0, %xmm1 -# CHECK-NEXT: - - - - - - - - 1.00 - - - - - 0.67 0.33 - - - - - - - vmovups %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 - - - - - 1.00 - - - - - - - - vmovups %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - - - 0.33 0.67 - - - - - - - vmovapd %xmm2, %xmm3 -# CHECK-NEXT: - - - - - - - - 0.67 0.33 - - - - 0.33 0.67 - - - - - - - vmovupd %xmm3, %xmm4 -# CHECK-NEXT: - - - - - - - - - - - 1.00 - - 0.33 0.67 - - - - - - - vmovdqa %xmm4, %xmm5 -# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.67 0.33 - - - - - - - vmovdqu %xmm5, %xmm0 +# CHECK-NEXT: - - - - - - - - 0.67 0.33 - - - - 0.67 0.33 - - - - - - - vmovupd %xmm3, %xmm4 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - 1.00 - - - - - - - vmovdqa %xmm4, %xmm5 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 0.33 0.67 - - - - - - - vmovdqu %xmm5, %xmm0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456 +# CHECK-NEXT: 01234 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DR . . .. vxorps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [0,1] DeER . . .. vmovaps %xmm0, %xmm1 -# CHECK-NEXT: [0,2] D=eER. . .. vmovups %xmm1, %xmm2 -# CHECK-NEXT: [0,3] D==eER . .. vmovapd %xmm2, %xmm3 -# CHECK-NEXT: [0,4] .D==eER . .. vmovupd %xmm3, %xmm4 -# CHECK-NEXT: [0,5] .D===eeER . .. vmovdqa %xmm4, %xmm5 -# CHECK-NEXT: [0,6] .D=====eeER .. vmovdqu %xmm5, %xmm0 -# CHECK-NEXT: [1,0] .D--------R .. vxorps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [1,1] . DeE-----R .. vmovaps %xmm0, %xmm1 -# CHECK-NEXT: [1,2] . D=eE----R .. vmovups %xmm1, %xmm2 -# CHECK-NEXT: [1,3] . D==eE----R .. vmovapd %xmm2, %xmm3 -# CHECK-NEXT: [1,4] . D===eE---R .. vmovupd %xmm3, %xmm4 -# CHECK-NEXT: [1,5] . D===eeE-R .. vmovdqa %xmm4, %xmm5 -# CHECK-NEXT: [1,6] . D=====eeER .. vmovdqu %xmm5, %xmm0 -# CHECK-NEXT: [2,0] . D--------R .. vxorps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [2,1] . D==eE----R .. vmovaps %xmm0, %xmm1 -# CHECK-NEXT: [2,2] . D===eE--R .. vmovups %xmm1, %xmm2 -# CHECK-NEXT: [2,3] . D====eE--R .. vmovapd %xmm2, %xmm3 -# CHECK-NEXT: [2,4] . D=====eE-R .. vmovupd %xmm3, %xmm4 -# CHECK-NEXT: [2,5] . D======eeER.. vmovdqa %xmm4, %xmm5 -# CHECK-NEXT: [2,6] . D=======eeER vmovdqu %xmm5, %xmm0 +# CHECK: [0,0] DR . . . vxorps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [0,1] DeER . . . vmovaps %xmm0, %xmm1 +# CHECK-NEXT: [0,2] D=eER. . . vmovups %xmm1, %xmm2 +# CHECK-NEXT: [0,3] D==eER . . vmovapd %xmm2, %xmm3 +# CHECK-NEXT: [0,4] .D==eER . . vmovupd %xmm3, %xmm4 +# CHECK-NEXT: [0,5] .D===eER . . vmovdqa %xmm4, %xmm5 +# CHECK-NEXT: [0,6] .D====eER . . vmovdqu %xmm5, %xmm0 +# CHECK-NEXT: [1,0] .D------R . . vxorps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [1,1] . DeE---R . . vmovaps %xmm0, %xmm1 +# CHECK-NEXT: [1,2] . D=eE--R . . vmovups %xmm1, %xmm2 +# CHECK-NEXT: [1,3] . D==eE--R. . vmovapd %xmm2, %xmm3 +# CHECK-NEXT: [1,4] . D===eE-R. . vmovupd %xmm3, %xmm4 +# CHECK-NEXT: [1,5] . D===eER. . vmovdqa %xmm4, %xmm5 +# CHECK-NEXT: [1,6] . D====eER . vmovdqu %xmm5, %xmm0 +# CHECK-NEXT: [2,0] . D------R . vxorps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [2,1] . D===eE-R . vmovaps %xmm0, %xmm1 +# CHECK-NEXT: [2,2] . D===eER . vmovups %xmm1, %xmm2 +# CHECK-NEXT: [2,3] . D====eER . vmovapd %xmm2, %xmm3 +# CHECK-NEXT: [2,4] . D=====eER . vmovupd %xmm3, %xmm4 +# CHECK-NEXT: [2,5] . D======eER. vmovdqa %xmm4, %xmm5 +# CHECK-NEXT: [2,6] . D======eER vmovdqu %xmm5, %xmm0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -122,10 +122,10 @@ vmovdqu %xmm5, %xmm0 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 3 0.0 0.0 5.3 vxorps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: 1. 3 1.7 1.7 3.0 vmovaps %xmm0, %xmm1 -# CHECK-NEXT: 2. 3 2.7 0.3 2.0 vmovups %xmm1, %xmm2 -# CHECK-NEXT: 3. 3 3.7 0.0 2.0 vmovapd %xmm2, %xmm3 -# CHECK-NEXT: 4. 3 4.3 0.0 1.3 vmovupd %xmm3, %xmm4 -# CHECK-NEXT: 5. 3 5.0 0.0 0.3 vmovdqa %xmm4, %xmm5 -# CHECK-NEXT: 6. 3 6.7 0.0 0.0 vmovdqu %xmm5, %xmm0 +# CHECK-NEXT: 0. 3 0.0 0.0 4.0 vxorps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: 1. 3 2.0 2.0 1.3 vmovaps %xmm0, %xmm1 +# CHECK-NEXT: 2. 3 2.7 0.0 0.7 vmovups %xmm1, %xmm2 +# CHECK-NEXT: 3. 3 3.7 0.0 0.7 vmovapd %xmm2, %xmm3 +# CHECK-NEXT: 4. 3 4.3 0.0 0.3 vmovupd %xmm3, %xmm4 +# CHECK-NEXT: 5. 3 5.0 0.0 0.0 vmovdqa %xmm4, %xmm5 +# CHECK-NEXT: 6. 3 5.7 0.0 0.0 vmovdqu %xmm5, %xmm0 diff --git a/test/tools/llvm-mca/X86/BdVer2/resources-avx1.s b/test/tools/llvm-mca/X86/BdVer2/resources-avx1.s index 917ceff1b6c..498b3af80c7 100644 --- a/test/tools/llvm-mca/X86/BdVer2/resources-avx1.s +++ b/test/tools/llvm-mca/X86/BdVer2/resources-avx1.s @@ -1188,13 +1188,13 @@ vzeroupper # CHECK-NEXT: 1 14 9.50 * vdivss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 15 15 1.50 vdppd $22, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 17 20 1.50 * vdppd $22, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 17 25 1.50 vdpps $22, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 17 27 1.50 vdpps $22, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 18 30 1.50 * vdpps $22, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 25 27 3.00 vdpps $22, %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 29 32 3.00 * vdpps $22, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 2 0.50 vextractf128 $1, %ymm0, %xmm2 # CHECK-NEXT: 2 7 0.50 * vextractf128 $1, %ymm0, (%rax) -# CHECK-NEXT: 2 13 1.00 vextractps $1, %xmm0, %ecx +# CHECK-NEXT: 2 12 1.00 vextractps $1, %xmm0, %ecx # CHECK-NEXT: 2 13 1.00 * vextractps $1, %xmm0, (%rax) # CHECK-NEXT: 3 11 1.00 vhaddpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 4 16 1.00 * vhaddpd (%rax), %xmm1, %xmm2 @@ -1264,21 +1264,21 @@ vzeroupper # CHECK-NEXT: 2 2 1.00 vmovaps %ymm0, %ymm2 # CHECK-NEXT: 4 1 1.00 * vmovaps %ymm0, (%rax) # CHECK-NEXT: 2 5 0.50 * vmovaps (%rax), %ymm2 -# CHECK-NEXT: 2 10 0.50 vmovd %eax, %xmm2 +# CHECK-NEXT: 2 11 0.50 vmovd %eax, %xmm2 # CHECK-NEXT: 1 5 0.50 * vmovd (%rax), %xmm2 -# CHECK-NEXT: 1 10 1.00 vmovd %xmm0, %ecx +# CHECK-NEXT: 1 11 1.00 vmovd %xmm0, %ecx # CHECK-NEXT: 1 2 1.00 * vmovd %xmm0, (%rax) # CHECK-NEXT: 1 2 0.50 vmovddup %xmm0, %xmm2 # CHECK-NEXT: 1 7 0.50 * vmovddup (%rax), %xmm2 # CHECK-NEXT: 2 2 1.00 vmovddup %ymm0, %ymm2 # CHECK-NEXT: 2 7 1.00 * vmovddup (%rax), %ymm2 -# CHECK-NEXT: 1 2 0.50 vmovdqa %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 vmovdqa %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 * vmovdqa %xmm0, (%rax) # CHECK-NEXT: 1 5 0.50 * vmovdqa (%rax), %xmm2 # CHECK-NEXT: 2 2 1.00 vmovdqa %ymm0, %ymm2 # CHECK-NEXT: 4 1 1.00 * vmovdqa %ymm0, (%rax) # CHECK-NEXT: 2 5 0.50 * vmovdqa (%rax), %ymm2 -# CHECK-NEXT: 1 2 0.50 vmovdqu %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 vmovdqu %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 * vmovdqu %xmm0, (%rax) # CHECK-NEXT: 1 5 0.50 * vmovdqu (%rax), %xmm2 # CHECK-NEXT: 2 2 1.00 vmovdqu %ymm0, %ymm2 @@ -1294,10 +1294,10 @@ vzeroupper # CHECK-NEXT: 1 7 0.50 * vmovlpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 2 1.00 * vmovlps %xmm0, (%rax) # CHECK-NEXT: 1 7 0.50 * vmovlps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 2 10 1.00 vmovmskpd %xmm0, %ecx -# CHECK-NEXT: 2 10 1.00 vmovmskpd %ymm0, %ecx -# CHECK-NEXT: 2 10 1.00 vmovmskps %xmm0, %ecx -# CHECK-NEXT: 2 10 1.00 vmovmskps %ymm0, %ecx +# CHECK-NEXT: 2 12 1.00 vmovmskpd %xmm0, %ecx +# CHECK-NEXT: 2 12 1.00 vmovmskpd %ymm0, %ecx +# CHECK-NEXT: 2 12 1.00 vmovmskps %xmm0, %ecx +# CHECK-NEXT: 2 12 1.00 vmovmskps %ymm0, %ecx # CHECK-NEXT: 1 2 1.00 * vmovntdq %xmm0, (%rax) # CHECK-NEXT: 4 2 2.00 * vmovntdq %ymm0, (%rax) # CHECK-NEXT: 1 5 0.50 * vmovntdqa (%rax), %xmm2 @@ -1307,9 +1307,9 @@ vzeroupper # CHECK-NEXT: 1 3 1.00 * vmovntps %xmm0, (%rax) # CHECK-NEXT: 4 3 2.00 * vmovntps %ymm0, (%rax) # CHECK-NEXT: 1 2 0.50 vmovq %xmm0, %xmm2 -# CHECK-NEXT: 2 10 0.50 vmovq %rax, %xmm2 +# CHECK-NEXT: 2 11 0.50 vmovq %rax, %xmm2 # CHECK-NEXT: 1 5 0.50 * vmovq (%rax), %xmm2 -# CHECK-NEXT: 1 10 1.00 vmovq %xmm0, %rcx +# CHECK-NEXT: 1 11 1.00 vmovq %xmm0, %rcx # CHECK-NEXT: 1 2 1.00 * vmovq %xmm0, (%rax) # CHECK-NEXT: 1 2 0.50 vmovsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 2 1.00 * vmovsd %xmm0, (%rax) @@ -1337,7 +1337,7 @@ vzeroupper # CHECK-NEXT: 2 2 1.00 vmovups %ymm0, %ymm2 # CHECK-NEXT: 8 1 1.00 * vmovups %ymm0, (%rax) # CHECK-NEXT: 2 5 0.50 * vmovups (%rax), %ymm2 -# CHECK-NEXT: 9 9 2.00 vmpsadbw $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 9 8 2.00 vmpsadbw $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 9 14 2.00 * vmpsadbw $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 5 1.00 vmulpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 10 1.00 * vmulpd (%rax), %xmm1, %xmm2 @@ -1403,7 +1403,7 @@ vzeroupper # CHECK-NEXT: 1 7 2.00 * vpblendvb %xmm3, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 2 0.50 vpblendw $11, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 0.50 * vpblendw $11, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 6 13 1.00 vpclmulqdq $11, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 6 12 1.00 vpclmulqdq $11, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 6 17 1.00 * vpclmulqdq $11, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 2 0.50 vpcmpeqb %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 0.50 * vpcmpeqb (%rax), %xmm1, %xmm2 @@ -1413,8 +1413,8 @@ vzeroupper # CHECK-NEXT: 1 7 0.50 * vpcmpeqq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 2 0.50 vpcmpeqw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 0.50 * vpcmpeqw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 27 15 4.00 vpcmpestri $1, %xmm0, %xmm2 -# CHECK-NEXT: 28 20 4.50 * vpcmpestri $1, (%rax), %xmm2 +# CHECK-NEXT: 27 14 4.00 vpcmpestri $1, %xmm0, %xmm2 +# CHECK-NEXT: 28 19 4.50 * vpcmpestri $1, (%rax), %xmm2 # CHECK-NEXT: 27 10 4.00 vpcmpestrm $1, %xmm0, %xmm2 # CHECK-NEXT: 28 15 4.50 * vpcmpestrm $1, (%rax), %xmm2 # CHECK-NEXT: 1 2 0.50 vpcmpgtb %xmm0, %xmm1, %xmm2 @@ -1425,10 +1425,10 @@ vzeroupper # CHECK-NEXT: 1 7 0.50 * vpcmpgtq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 2 0.50 vpcmpgtw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 0.50 * vpcmpgtw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 7 14 1.00 vpcmpistri $1, %xmm0, %xmm2 -# CHECK-NEXT: 8 19 1.00 * vpcmpistri $1, (%rax), %xmm2 -# CHECK-NEXT: 7 6 1.00 vpcmpistrm $1, %xmm0, %xmm2 -# CHECK-NEXT: 9 11 1.00 * vpcmpistrm $1, (%rax), %xmm2 +# CHECK-NEXT: 7 11 1.00 vpcmpistri $1, %xmm0, %xmm2 +# CHECK-NEXT: 8 16 1.00 * vpcmpistri $1, (%rax), %xmm2 +# CHECK-NEXT: 7 7 1.00 vpcmpistrm $1, %xmm0, %xmm2 +# CHECK-NEXT: 9 12 1.00 * vpcmpistrm $1, (%rax), %xmm2 # CHECK-NEXT: 8 4 0.50 vperm2f128 $1, %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 10 8 0.50 * vperm2f128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 2 0.50 vpermilpd $1, %xmm0, %xmm2 @@ -1447,13 +1447,13 @@ vzeroupper # CHECK-NEXT: 2 7 1.00 * vpermilps $1, (%rax), %ymm2 # CHECK-NEXT: 2 3 3.00 vpermilps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 8 3.00 * vpermilps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 2 13 1.00 vpextrb $1, %xmm0, %ecx +# CHECK-NEXT: 2 12 1.00 vpextrb $1, %xmm0, %ecx # CHECK-NEXT: 2 13 1.00 * vpextrb $1, %xmm0, (%rax) -# CHECK-NEXT: 2 13 1.00 vpextrd $1, %xmm0, %ecx +# CHECK-NEXT: 2 12 1.00 vpextrd $1, %xmm0, %ecx # CHECK-NEXT: 2 13 1.00 * vpextrd $1, %xmm0, (%rax) -# CHECK-NEXT: 2 13 1.00 vpextrq $1, %xmm0, %rcx +# CHECK-NEXT: 2 12 1.00 vpextrq $1, %xmm0, %rcx # CHECK-NEXT: 2 13 1.00 * vpextrq $1, %xmm0, (%rax) -# CHECK-NEXT: 2 13 1.00 vpextrw $1, %xmm0, %ecx +# CHECK-NEXT: 2 12 1.00 vpextrw $1, %xmm0, %ecx # CHECK-NEXT: 2 13 1.00 * vpextrw $1, %xmm0, (%rax) # CHECK-NEXT: 3 5 0.50 vphaddd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 4 10 0.50 * vphaddd (%rax), %xmm1, %xmm2 @@ -1505,7 +1505,7 @@ vzeroupper # CHECK-NEXT: 1 7 0.50 * vpminud (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 2 0.50 vpminuw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 0.50 * vpminuw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 2 13 1.00 vpmovmskb %xmm0, %ecx +# CHECK-NEXT: 2 12 1.00 vpmovmskb %xmm0, %ecx # CHECK-NEXT: 1 2 0.50 vpmovsxbd %xmm0, %xmm2 # CHECK-NEXT: 1 7 0.50 * vpmovsxbd (%rax), %xmm2 # CHECK-NEXT: 1 2 0.50 vpmovsxbq %xmm0, %xmm2 diff --git a/test/tools/llvm-mca/X86/BdVer2/resources-mmx.s b/test/tools/llvm-mca/X86/BdVer2/resources-mmx.s index 66dfc084c27..66a5cd96347 100644 --- a/test/tools/llvm-mca/X86/BdVer2/resources-mmx.s +++ b/test/tools/llvm-mca/X86/BdVer2/resources-mmx.s @@ -165,13 +165,13 @@ pxor (%rax), %mm2 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 2 0.50 * * U emms -# CHECK-NEXT: 2 10 0.50 movd %eax, %mm2 +# CHECK-NEXT: 2 11 0.50 movd %eax, %mm2 # CHECK-NEXT: 1 5 0.50 * movd (%rax), %mm2 -# CHECK-NEXT: 1 10 1.00 movd %mm0, %ecx +# CHECK-NEXT: 1 11 1.00 movd %mm0, %ecx # CHECK-NEXT: 1 2 1.00 * U movd %mm0, (%rax) -# CHECK-NEXT: 2 10 0.50 movq %rax, %mm2 +# CHECK-NEXT: 2 11 0.50 movq %rax, %mm2 # CHECK-NEXT: 1 5 0.50 * movq (%rax), %mm2 -# CHECK-NEXT: 1 10 1.00 movq %mm0, %rcx +# CHECK-NEXT: 1 11 1.00 movq %mm0, %rcx # CHECK-NEXT: 1 2 1.00 * movq %mm0, (%rax) # CHECK-NEXT: 1 2 0.50 packsswb %mm0, %mm2 # CHECK-NEXT: 1 7 0.50 * packsswb (%rax), %mm2 diff --git a/test/tools/llvm-mca/X86/BdVer2/resources-sse1.s b/test/tools/llvm-mca/X86/BdVer2/resources-sse1.s index 4bfb1cc7057..81b2c46441a 100644 --- a/test/tools/llvm-mca/X86/BdVer2/resources-sse1.s +++ b/test/tools/llvm-mca/X86/BdVer2/resources-sse1.s @@ -212,7 +212,7 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 9 1.00 * cvtpi2ps (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 cvtps2pi %xmm0, %mm2 # CHECK-NEXT: 1 9 1.00 * cvtps2pi (%rax), %mm2 -# CHECK-NEXT: 2 14 1.00 cvtsi2ssl %ecx, %xmm2 +# CHECK-NEXT: 2 13 1.00 cvtsi2ssl %ecx, %xmm2 # CHECK-NEXT: 2 13 1.00 cvtsi2ssq %rcx, %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2ssl (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2ssl (%rax), %xmm2 @@ -249,7 +249,7 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 7 0.50 * movhps (%rax), %xmm2 # CHECK-NEXT: 1 2 1.00 * movlps %xmm0, (%rax) # CHECK-NEXT: 1 7 0.50 * movlps (%rax), %xmm2 -# CHECK-NEXT: 2 10 1.00 movmskps %xmm0, %ecx +# CHECK-NEXT: 2 12 1.00 movmskps %xmm0, %ecx # CHECK-NEXT: 1 3 1.00 * movntps %xmm0, (%rax) # CHECK-NEXT: 1 2 1.00 * * U movntq %mm0, (%rax) # CHECK-NEXT: 1 2 0.50 movss %xmm0, %xmm2 @@ -268,7 +268,7 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 7 0.50 * pavgb (%rax), %mm2 # CHECK-NEXT: 1 2 0.50 pavgw %mm0, %mm2 # CHECK-NEXT: 1 7 0.50 * pavgw (%rax), %mm2 -# CHECK-NEXT: 2 13 1.00 pextrw $1, %mm0, %ecx +# CHECK-NEXT: 2 12 1.00 pextrw $1, %mm0, %ecx # CHECK-NEXT: 2 12 0.50 pinsrw $1, %eax, %mm2 # CHECK-NEXT: 2 6 0.50 * pinsrw $1, (%rax), %mm2 # CHECK-NEXT: 1 2 0.50 pmaxsw %mm0, %mm2 @@ -279,7 +279,7 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 7 0.50 * pminsw (%rax), %mm2 # CHECK-NEXT: 1 2 0.50 pminub %mm0, %mm2 # CHECK-NEXT: 1 7 0.50 * pminub (%rax), %mm2 -# CHECK-NEXT: 2 13 1.00 pmovmskb %xmm0, %ecx +# CHECK-NEXT: 2 12 1.00 pmovmskb %xmm0, %ecx # CHECK-NEXT: 1 4 1.00 pmulhuw %mm0, %mm2 # CHECK-NEXT: 1 9 1.00 * pmulhuw (%rax), %mm2 # CHECK-NEXT: 1 5 0.50 * * prefetcht0 (%rax) diff --git a/test/tools/llvm-mca/X86/BdVer2/resources-sse2.s b/test/tools/llvm-mca/X86/BdVer2/resources-sse2.s index 34bea00ff07..3e9051032bd 100644 --- a/test/tools/llvm-mca/X86/BdVer2/resources-sse2.s +++ b/test/tools/llvm-mca/X86/BdVer2/resources-sse2.s @@ -444,7 +444,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 18 1.00 * cvtsd2si (%rax), %rcx # CHECK-NEXT: 1 4 1.00 cvtsd2ss %xmm0, %xmm2 # CHECK-NEXT: 1 9 1.00 * cvtsd2ss (%rax), %xmm2 -# CHECK-NEXT: 2 14 1.00 cvtsi2sdl %ecx, %xmm2 +# CHECK-NEXT: 2 13 1.00 cvtsi2sdl %ecx, %xmm2 # CHECK-NEXT: 2 13 1.00 cvtsi2sdq %rcx, %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2sdl (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2sdl (%rax), %xmm2 @@ -478,32 +478,32 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 movapd %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 * movapd %xmm0, (%rax) # CHECK-NEXT: 1 5 0.50 * movapd (%rax), %xmm2 -# CHECK-NEXT: 2 10 0.50 movd %eax, %xmm2 +# CHECK-NEXT: 2 11 0.50 movd %eax, %xmm2 # CHECK-NEXT: 1 5 0.50 * movd (%rax), %xmm2 -# CHECK-NEXT: 1 10 1.00 movd %xmm0, %ecx +# CHECK-NEXT: 1 11 1.00 movd %xmm0, %ecx # CHECK-NEXT: 1 2 1.00 * movd %xmm0, (%rax) -# CHECK-NEXT: 1 2 0.50 movdqa %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 movdqa %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 * movdqa %xmm0, (%rax) # CHECK-NEXT: 1 5 0.50 * movdqa (%rax), %xmm2 -# CHECK-NEXT: 1 2 0.50 movdqu %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 movdqu %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 * movdqu %xmm0, (%rax) # CHECK-NEXT: 1 5 0.50 * movdqu (%rax), %xmm2 -# CHECK-NEXT: 1 2 0.50 movdq2q %xmm0, %mm2 +# CHECK-NEXT: 1 1 0.50 movdq2q %xmm0, %mm2 # CHECK-NEXT: 2 2 1.00 * movhpd %xmm0, (%rax) # CHECK-NEXT: 1 7 0.50 * movhpd (%rax), %xmm2 # CHECK-NEXT: 1 2 1.00 * movlpd %xmm0, (%rax) # CHECK-NEXT: 1 7 0.50 * movlpd (%rax), %xmm2 -# CHECK-NEXT: 2 10 1.00 movmskpd %xmm0, %ecx +# CHECK-NEXT: 2 12 1.00 movmskpd %xmm0, %ecx # CHECK-NEXT: 1 1 1.00 * movntil %eax, (%rax) # CHECK-NEXT: 1 1 1.00 * movntiq %rax, (%rax) # CHECK-NEXT: 1 2 1.00 * movntdq %xmm0, (%rax) # CHECK-NEXT: 1 3 1.00 * movntpd %xmm0, (%rax) # CHECK-NEXT: 1 2 0.50 movq %xmm0, %xmm2 -# CHECK-NEXT: 2 10 0.50 movq %rax, %xmm2 +# CHECK-NEXT: 2 11 0.50 movq %rax, %xmm2 # CHECK-NEXT: 1 5 0.50 * movq (%rax), %xmm2 -# CHECK-NEXT: 1 10 1.00 movq %xmm0, %rcx +# CHECK-NEXT: 1 11 1.00 movq %xmm0, %rcx # CHECK-NEXT: 1 2 1.00 * movq %xmm0, (%rax) -# CHECK-NEXT: 1 2 0.50 movq2dq %mm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 movq2dq %mm0, %xmm2 # CHECK-NEXT: 1 2 0.50 movsd %xmm0, %xmm2 # CHECK-NEXT: 1 2 1.00 * movsd %xmm0, (%rax) # CHECK-NEXT: 1 5 0.50 * movsd (%rax), %xmm2 @@ -560,7 +560,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 7 0.50 * pcmpgtd (%rax), %xmm2 # CHECK-NEXT: 1 2 0.50 pcmpgtw %xmm0, %xmm2 # CHECK-NEXT: 1 7 0.50 * pcmpgtw (%rax), %xmm2 -# CHECK-NEXT: 2 13 1.00 pextrw $1, %xmm0, %ecx +# CHECK-NEXT: 2 12 1.00 pextrw $1, %xmm0, %ecx # CHECK-NEXT: 2 12 0.50 pinsrw $1, %eax, %xmm0 # CHECK-NEXT: 2 6 0.50 * pinsrw $1, (%rax), %xmm0 # CHECK-NEXT: 1 4 1.00 pmaddwd %xmm0, %xmm2 @@ -573,7 +573,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 7 0.50 * pminsw (%rax), %xmm2 # CHECK-NEXT: 1 2 0.50 pminub %xmm0, %xmm2 # CHECK-NEXT: 1 7 0.50 * pminub (%rax), %xmm2 -# CHECK-NEXT: 2 13 1.00 pmovmskb %xmm0, %ecx +# CHECK-NEXT: 2 12 1.00 pmovmskb %xmm0, %ecx # CHECK-NEXT: 1 4 1.00 pmulhuw %xmm0, %xmm2 # CHECK-NEXT: 1 9 1.00 * pmulhuw (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 pmulhw %xmm0, %xmm2 diff --git a/test/tools/llvm-mca/X86/BdVer2/resources-sse41.s b/test/tools/llvm-mca/X86/BdVer2/resources-sse41.s index 1e919a58d1c..6eb2bd98f3e 100644 --- a/test/tools/llvm-mca/X86/BdVer2/resources-sse41.s +++ b/test/tools/llvm-mca/X86/BdVer2/resources-sse41.s @@ -167,7 +167,7 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 17 20 1.50 * dppd $22, (%rax), %xmm2 # CHECK-NEXT: 16 25 1.50 dpps $22, %xmm0, %xmm2 # CHECK-NEXT: 18 30 1.50 * dpps $22, (%rax), %xmm2 -# CHECK-NEXT: 2 13 1.00 extractps $1, %xmm0, %ecx +# CHECK-NEXT: 2 12 1.00 extractps $1, %xmm0, %ecx # CHECK-NEXT: 2 13 1.00 * extractps $1, %xmm0, (%rax) # CHECK-NEXT: 1 2 0.50 insertps $1, %xmm0, %xmm2 # CHECK-NEXT: 1 7 0.50 * insertps $1, (%rax), %xmm2 @@ -182,11 +182,11 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 1 7 0.50 * pblendw $11, (%rax), %xmm2 # CHECK-NEXT: 1 2 0.50 pcmpeqq %xmm0, %xmm2 # CHECK-NEXT: 1 7 0.50 * pcmpeqq (%rax), %xmm2 -# CHECK-NEXT: 2 13 1.00 pextrb $1, %xmm0, %ecx +# CHECK-NEXT: 2 12 1.00 pextrb $1, %xmm0, %ecx # CHECK-NEXT: 2 13 1.00 * pextrb $1, %xmm0, (%rax) -# CHECK-NEXT: 2 13 1.00 pextrd $1, %xmm0, %ecx +# CHECK-NEXT: 2 12 1.00 pextrd $1, %xmm0, %ecx # CHECK-NEXT: 2 13 1.00 * pextrd $1, %xmm0, (%rax) -# CHECK-NEXT: 2 13 1.00 pextrq $1, %xmm0, %rcx +# CHECK-NEXT: 2 12 1.00 pextrq $1, %xmm0, %rcx # CHECK-NEXT: 2 13 1.00 * pextrq $1, %xmm0, (%rax) # CHECK-NEXT: 2 13 1.00 * pextrw $1, %xmm0, (%rax) # CHECK-NEXT: 2 4 1.00 phminposuw %xmm0, %xmm2 diff --git a/test/tools/llvm-mca/X86/BdVer2/resources-sse42.s b/test/tools/llvm-mca/X86/BdVer2/resources-sse42.s index 433511f7ee5..5d2af199dcb 100644 --- a/test/tools/llvm-mca/X86/BdVer2/resources-sse42.s +++ b/test/tools/llvm-mca/X86/BdVer2/resources-sse42.s @@ -40,24 +40,24 @@ pcmpgtq (%rax), %xmm2 # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 3 3 2.00 crc32b %al, %ecx -# CHECK-NEXT: 3 7 2.00 * crc32b (%rax), %ecx +# CHECK-NEXT: 3 2 2.00 crc32b %al, %ecx +# CHECK-NEXT: 3 6 2.00 * crc32b (%rax), %ecx # CHECK-NEXT: 7 6 2.00 crc32l %eax, %ecx -# CHECK-NEXT: 3 7 2.00 * crc32l (%rax), %ecx +# CHECK-NEXT: 3 6 2.00 * crc32l (%rax), %ecx # CHECK-NEXT: 5 5 2.00 crc32w %ax, %ecx -# CHECK-NEXT: 3 7 2.00 * crc32w (%rax), %ecx -# CHECK-NEXT: 3 3 2.00 crc32b %al, %rcx -# CHECK-NEXT: 3 7 2.00 * crc32b (%rax), %rcx +# CHECK-NEXT: 3 6 2.00 * crc32w (%rax), %ecx +# CHECK-NEXT: 3 2 2.00 crc32b %al, %rcx +# CHECK-NEXT: 3 6 2.00 * crc32b (%rax), %rcx # CHECK-NEXT: 11 10 2.00 crc32q %rax, %rcx -# CHECK-NEXT: 3 7 2.00 * crc32q (%rax), %rcx -# CHECK-NEXT: 27 15 4.00 pcmpestri $1, %xmm0, %xmm2 -# CHECK-NEXT: 28 20 4.50 * pcmpestri $1, (%rax), %xmm2 +# CHECK-NEXT: 3 6 2.00 * crc32q (%rax), %rcx +# CHECK-NEXT: 27 14 4.00 pcmpestri $1, %xmm0, %xmm2 +# CHECK-NEXT: 28 19 4.50 * pcmpestri $1, (%rax), %xmm2 # CHECK-NEXT: 27 10 4.00 pcmpestrm $1, %xmm0, %xmm2 # CHECK-NEXT: 28 15 4.50 * pcmpestrm $1, (%rax), %xmm2 -# CHECK-NEXT: 7 14 1.00 pcmpistri $1, %xmm0, %xmm2 -# CHECK-NEXT: 8 19 1.00 * pcmpistri $1, (%rax), %xmm2 -# CHECK-NEXT: 7 6 1.00 pcmpistrm $1, %xmm0, %xmm2 -# CHECK-NEXT: 9 11 1.00 * pcmpistrm $1, (%rax), %xmm2 +# CHECK-NEXT: 7 11 1.00 pcmpistri $1, %xmm0, %xmm2 +# CHECK-NEXT: 8 16 1.00 * pcmpistri $1, (%rax), %xmm2 +# CHECK-NEXT: 7 7 1.00 pcmpistrm $1, %xmm0, %xmm2 +# CHECK-NEXT: 9 12 1.00 * pcmpistrm $1, (%rax), %xmm2 # CHECK-NEXT: 1 2 0.50 pcmpgtq %xmm0, %xmm2 # CHECK-NEXT: 1 7 0.50 * pcmpgtq (%rax), %xmm2 diff --git a/test/tools/llvm-mca/X86/BdVer2/resources-x86_64.s b/test/tools/llvm-mca/X86/BdVer2/resources-x86_64.s index 55df4018278..f8495db6973 100644 --- a/test/tools/llvm-mca/X86/BdVer2/resources-x86_64.s +++ b/test/tools/llvm-mca/X86/BdVer2/resources-x86_64.s @@ -1333,7 +1333,7 @@ xorq (%rax), %rdi # CHECK-NEXT: 15 7 0.50 rcrq $7, %rdi # CHECK-NEXT: 2 5 1.00 * rclq $7, (%rax) # CHECK-NEXT: 2 5 1.00 * rcrq $7, (%rax) -# CHECK-NEXT: 17 7 0.50 rclq %cl, %rdi +# CHECK-NEXT: 17 8 0.50 rclq %cl, %rdi # CHECK-NEXT: 16 7 0.50 rcrq %cl, %rdi # CHECK-NEXT: 2 5 1.00 * rclq %cl, (%rax) # CHECK-NEXT: 2 5 1.00 * rcrq %cl, (%rax) @@ -1540,28 +1540,28 @@ xorq (%rax), %rdi # CHECK-NEXT: 2 1 1.00 * setg (%rax) # CHECK-NEXT: 1 1 0.50 setle %al # CHECK-NEXT: 2 1 1.00 * setle (%rax) -# CHECK-NEXT: 7 4 4.00 shldw %cl, %si, %di -# CHECK-NEXT: 7 4 4.00 shrdw %cl, %si, %di +# CHECK-NEXT: 7 3 4.00 shldw %cl, %si, %di +# CHECK-NEXT: 7 3 4.00 shrdw %cl, %si, %di # CHECK-NEXT: 8 4 11.00 * * shldw %cl, %si, (%rax) # CHECK-NEXT: 8 4 11.00 * * shrdw %cl, %si, (%rax) -# CHECK-NEXT: 6 4 3.00 shldw $7, %si, %di +# CHECK-NEXT: 6 3 3.00 shldw $7, %si, %di # CHECK-NEXT: 6 3 3.00 shrdw $7, %si, %di # CHECK-NEXT: 8 4 11.00 * * shldw $7, %si, (%rax) # CHECK-NEXT: 8 4 11.00 * * shrdw $7, %si, (%rax) -# CHECK-NEXT: 7 4 4.00 shldl %cl, %esi, %edi -# CHECK-NEXT: 7 4 4.00 shrdl %cl, %esi, %edi +# CHECK-NEXT: 7 3 4.00 shldl %cl, %esi, %edi +# CHECK-NEXT: 7 3 4.00 shrdl %cl, %esi, %edi # CHECK-NEXT: 8 4 11.00 * * shldl %cl, %esi, (%rax) # CHECK-NEXT: 8 4 11.00 * * shrdl %cl, %esi, (%rax) # CHECK-NEXT: 6 3 3.00 shldl $7, %esi, %edi -# CHECK-NEXT: 6 4 3.00 shrdl $7, %esi, %edi +# CHECK-NEXT: 6 3 3.00 shrdl $7, %esi, %edi # CHECK-NEXT: 8 4 11.00 * * shldl $7, %esi, (%rax) # CHECK-NEXT: 8 4 11.00 * * shrdl $7, %esi, (%rax) -# CHECK-NEXT: 7 4 4.00 shldq %cl, %rsi, %rdi -# CHECK-NEXT: 7 4 4.00 shrdq %cl, %rsi, %rdi +# CHECK-NEXT: 7 3 4.00 shldq %cl, %rsi, %rdi +# CHECK-NEXT: 7 3 4.00 shrdq %cl, %rsi, %rdi # CHECK-NEXT: 8 4 11.00 * * shldq %cl, %rsi, (%rax) # CHECK-NEXT: 8 4 11.00 * * shrdq %cl, %rsi, (%rax) -# CHECK-NEXT: 6 4 3.00 shldq $7, %rsi, %rdi -# CHECK-NEXT: 6 4 3.00 shrdq $7, %rsi, %rdi +# CHECK-NEXT: 6 3 3.00 shldq $7, %rsi, %rdi +# CHECK-NEXT: 6 3 3.00 shrdq $7, %rsi, %rdi # CHECK-NEXT: 8 4 11.00 * * shldq $7, %rsi, (%rax) # CHECK-NEXT: 8 4 11.00 * * shrdq $7, %rsi, (%rax) # CHECK-NEXT: 1 1 0.50 U stc @@ -1628,18 +1628,18 @@ xorq (%rax), %rdi # CHECK-NEXT: 1 5 0.50 * testq %rsi, (%rax) # CHECK-NEXT: 1 100 0.50 * U ud2 # CHECK-NEXT: 1 100 0.50 U wrmsr -# CHECK-NEXT: 4 2 1.00 xaddb %bl, %cl +# CHECK-NEXT: 4 1 1.00 xaddb %bl, %cl # CHECK-NEXT: 4 6 1.00 * * xaddb %bl, (%rcx) -# CHECK-NEXT: 4 2 1.00 xaddw %bx, %cx +# CHECK-NEXT: 4 1 1.00 xaddw %bx, %cx # CHECK-NEXT: 4 6 1.00 * * xaddw %ax, (%rbx) -# CHECK-NEXT: 4 2 1.00 xaddl %ebx, %ecx +# CHECK-NEXT: 4 1 1.00 xaddl %ebx, %ecx # CHECK-NEXT: 4 6 1.00 * * xaddl %eax, (%rbx) -# CHECK-NEXT: 4 2 1.00 xaddq %rbx, %rcx +# CHECK-NEXT: 4 1 1.00 xaddq %rbx, %rcx # CHECK-NEXT: 4 6 1.00 * * xaddq %rax, (%rbx) # CHECK-NEXT: 2 1 1.00 xchgb %bl, %cl # CHECK-NEXT: 2 5 1.00 * * xchgb %bl, (%rbx) # CHECK-NEXT: 2 1 1.00 xchgw %bx, %ax -# CHECK-NEXT: 2 2 1.00 xchgw %bx, %cx +# CHECK-NEXT: 2 1 1.00 xchgw %bx, %cx # CHECK-NEXT: 2 5 1.00 * * xchgw %ax, (%rbx) # CHECK-NEXT: 2 1 1.00 xchgl %ebx, %eax # CHECK-NEXT: 2 1 1.00 xchgl %ebx, %ecx diff --git a/test/tools/llvm-mca/X86/BdVer2/resources-xop.s b/test/tools/llvm-mca/X86/BdVer2/resources-xop.s index fabd125307e..980b5e5ab71 100644 --- a/test/tools/llvm-mca/X86/BdVer2/resources-xop.s +++ b/test/tools/llvm-mca/X86/BdVer2/resources-xop.s @@ -321,7 +321,7 @@ vpshlw %xmm0, (%rax), %xmm3 # CHECK-NEXT: 1 9 1.00 * vpmadcsswd %xmm0, (%rax), %xmm1, %xmm3 # CHECK-NEXT: 1 4 1.00 vpmadcswd %xmm0, %xmm1, %xmm2, %xmm3 # CHECK-NEXT: 1 9 1.00 * vpmadcswd %xmm0, (%rax), %xmm1, %xmm3 -# CHECK-NEXT: 1 3 2.00 vpperm %xmm0, %xmm1, %xmm2, %xmm3 +# CHECK-NEXT: 1 2 2.00 vpperm %xmm0, %xmm1, %xmm2, %xmm3 # CHECK-NEXT: 1 8 2.00 * vpperm (%rax), %xmm0, %xmm1, %xmm3 # CHECK-NEXT: 1 8 2.00 * vpperm %xmm0, (%rax), %xmm1, %xmm3 # CHECK-NEXT: 1 3 0.50 vprotb %xmm0, %xmm1, %xmm3 -- 2.40.0