From: Andrea Di Biagio Date: Fri, 13 Jul 2018 16:46:51 +0000 (+0000) Subject: [llvm-mca][BtVer2] Add tests for dependency breaking instructions. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b6e0532b347a44ed9c12f48a5d384c2f7e3ec995;p=llvm [llvm-mca][BtVer2] Add tests for dependency breaking instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@337024 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s b/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s new file mode 100644 index 00000000000..bc5ceb5b72e --- /dev/null +++ b/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s @@ -0,0 +1,74 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s + +# Perf stat reports an IPC of 1.97 for this block of code. + +# The CMP instruction doesn't depend on the value of EAX. It can set the flags +# without having to read the inputs. + +cmp %eax, %eax +cmovae %ebx, %eax + +# CHECK: Iterations: 1500 +# CHECK-NEXT: Instructions: 3000 +# CHECK-NEXT: Total Cycles: 3003 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 1.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.50 cmpl %eax, %eax +# CHECK-NEXT: 1 1 0.50 cmovael %ebx, %eax + +# CHECK: Resources: +# CHECK-NEXT: [0] - JALU0 +# CHECK-NEXT: [1] - JALU1 +# CHECK-NEXT: [2] - JDiv +# CHECK-NEXT: [3] - JFPA +# CHECK-NEXT: [4] - JFPM +# CHECK-NEXT: [5] - JFPU0 +# CHECK-NEXT: [6] - JFPU1 +# CHECK-NEXT: [7] - JLAGU +# CHECK-NEXT: [8] - JMul +# CHECK-NEXT: [9] - JSAGU +# CHECK-NEXT: [10] - JSTC +# CHECK-NEXT: [11] - JVALU0 +# CHECK-NEXT: [12] - JVALU1 +# CHECK-NEXT: [13] - JVIMUL + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - cmpl %eax, %eax +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - cmovael %ebx, %eax + +# CHECK: Timeline view: +# CHECK-NEXT: Index 012345678 + +# CHECK: [0,0] DeER . . cmpl %eax, %eax +# CHECK-NEXT: [0,1] D=eER. . cmovael %ebx, %eax +# CHECK-NEXT: [1,0] .D=eER . cmpl %eax, %eax +# CHECK-NEXT: [1,1] .D==eER . cmovael %ebx, %eax +# CHECK-NEXT: [2,0] . D==eER. cmpl %eax, %eax +# CHECK-NEXT: [2,1] . D===eER cmovael %ebx, %eax + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 3 2.0 0.3 0.0 cmpl %eax, %eax +# CHECK-NEXT: 1. 3 3.0 0.0 0.0 cmovael %ebx, %eax diff --git a/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s b/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s new file mode 100644 index 00000000000..97bf501e577 --- /dev/null +++ b/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s @@ -0,0 +1,90 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s + +# perf stat reports an IPC of 2.00 for this block of code. + +# All of the vector packed compares from this test are dependency breaking +# instructions. That means, there is no RAW dependency between any of the +# instructions, and the code can be fully parallelized in hardware. + +vpcmpeqb %xmm0, %xmm0, %xmm1 +vpcmpeqw %xmm1, %xmm1, %xmm2 +vpcmpeqd %xmm2, %xmm2, %xmm3 +vpcmpeqq %xmm3, %xmm3, %xmm0 + +# CHECK: Iterations: 1500 +# CHECK-NEXT: Instructions: 6000 +# CHECK-NEXT: Total Cycles: 6003 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 2.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.50 vpcmpeqb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: 1 1 0.50 vpcmpeqw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: 1 1 0.50 vpcmpeqq %xmm3, %xmm3, %xmm0 + +# CHECK: Resources: +# CHECK-NEXT: [0] - JALU0 +# CHECK-NEXT: [1] - JALU1 +# CHECK-NEXT: [2] - JDiv +# CHECK-NEXT: [3] - JFPA +# CHECK-NEXT: [4] - JFPM +# CHECK-NEXT: [5] - JFPU0 +# CHECK-NEXT: [6] - JFPU1 +# CHECK-NEXT: [7] - JLAGU +# CHECK-NEXT: [8] - JMul +# CHECK-NEXT: [9] - JSAGU +# CHECK-NEXT: [10] - JSTC +# CHECK-NEXT: [11] - JVALU0 +# CHECK-NEXT: [12] - JVALU1 +# CHECK-NEXT: [13] - JVIMUL + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: - - - - - 2.00 2.00 - - - - 2.00 2.00 - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: +# CHECK-NEXT: - - - - - - 1.00 - - - - - 1.00 - vpcmpeqb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: - - - - - 1.00 - - - - - 1.00 - - vpcmpeqw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - 1.00 - - - - - 1.00 - vpcmpeqd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: - - - - - 1.00 - - - - - 1.00 - - vpcmpeqq %xmm3, %xmm3, %xmm0 + +# CHECK: Timeline view: +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeER . . . vpcmpeqb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [0,1] D=eER. . . vpcmpeqw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: [0,2] .D=eER . . vpcmpeqd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [0,3] .D==eER . . vpcmpeqq %xmm3, %xmm3, %xmm0 +# CHECK-NEXT: [1,0] . D==eER . . vpcmpeqb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [1,1] . D===eER . . vpcmpeqw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: [1,2] . D===eER. . vpcmpeqd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [1,3] . D====eER . vpcmpeqq %xmm3, %xmm3, %xmm0 +# CHECK-NEXT: [2,0] . D====eER . vpcmpeqb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [2,1] . D=====eER . vpcmpeqw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: [2,2] . D=====eER. vpcmpeqd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [2,3] . D======eER vpcmpeqq %xmm3, %xmm3, %xmm0 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 3 3.0 0.3 0.0 vpcmpeqb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: 1. 3 4.0 0.0 0.0 vpcmpeqw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: 2. 3 4.0 0.0 0.0 vpcmpeqd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: 3. 3 5.0 0.0 0.0 vpcmpeqq %xmm3, %xmm3, %xmm0 diff --git a/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpgt.s b/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpgt.s new file mode 100644 index 00000000000..9ab8d039ccd --- /dev/null +++ b/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpgt.s @@ -0,0 +1,90 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s + +# perf stat reports an IPC of 2.00 for this block of code. + +# All of the vector packed compares from this test are zero idioms. These zero +# idioms are all detected and removed by the register renamer. That means, no +# uOp is executed, and there is no RAW dependency for any of the packed +# compares. + +vpcmpgtb %xmm0, %xmm0, %xmm1 +vpcmpgtw %xmm1, %xmm1, %xmm2 +vpcmpgtd %xmm2, %xmm2, %xmm3 +vpcmpgtq %xmm3, %xmm3, %xmm0 + +# CHECK: Iterations: 1500 +# CHECK-NEXT: Instructions: 6000 +# CHECK-NEXT: Total Cycles: 3001 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 2.00 +# CHECK-NEXT: Block RThroughput: 2.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 0 0.50 vpcmpgtb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: 1 0 0.50 vpcmpgtw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: 1 0 0.50 vpcmpgtd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: 1 0 0.50 vpcmpgtq %xmm3, %xmm3, %xmm0 + +# CHECK: Resources: +# CHECK-NEXT: [0] - JALU0 +# CHECK-NEXT: [1] - JALU1 +# CHECK-NEXT: [2] - JDiv +# CHECK-NEXT: [3] - JFPA +# CHECK-NEXT: [4] - JFPM +# CHECK-NEXT: [5] - JFPU0 +# CHECK-NEXT: [6] - JFPU1 +# CHECK-NEXT: [7] - JLAGU +# CHECK-NEXT: [8] - JMul +# CHECK-NEXT: [9] - JSAGU +# CHECK-NEXT: [10] - JSTC +# CHECK-NEXT: [11] - JVALU0 +# CHECK-NEXT: [12] - JVALU1 +# CHECK-NEXT: [13] - JVIMUL + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: - - - - - - - - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: +# CHECK-NEXT: - - - - - - - - - - - - - - vpcmpgtb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: - - - - - - - - - - - - - - vpcmpgtw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - - - vpcmpgtd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: - - - - - - - - - - - - - - vpcmpgtq %xmm3, %xmm3, %xmm0 + +# CHECK: Timeline view: +# CHECK-NEXT: Index 0123456 + +# CHECK: [0,0] DR .. vpcmpgtb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [0,1] DR .. vpcmpgtw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: [0,2] .DR .. vpcmpgtd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [0,3] .DR .. vpcmpgtq %xmm3, %xmm3, %xmm0 +# CHECK-NEXT: [1,0] . DR .. vpcmpgtb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [1,1] . DR .. vpcmpgtw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: [1,2] . DR.. vpcmpgtd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [1,3] . DR.. vpcmpgtq %xmm3, %xmm3, %xmm0 +# CHECK-NEXT: [2,0] . DR. vpcmpgtb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [2,1] . DR. vpcmpgtw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: [2,2] . DR vpcmpgtd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [2,3] . DR vpcmpgtq %xmm3, %xmm3, %xmm0 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 3 0.0 0.0 0.0 vpcmpgtb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: 1. 3 0.0 0.0 0.0 vpcmpgtw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: 2. 3 0.0 0.0 0.0 vpcmpgtd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: 3. 3 0.0 0.0 0.0 vpcmpgtq %xmm3, %xmm3, %xmm0 diff --git a/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-1.s b/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-1.s new file mode 100644 index 00000000000..34cabda553f --- /dev/null +++ b/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-1.s @@ -0,0 +1,75 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s + +# perf stat reports an IPC of 1.00 for this code block. + +# Although both SBB are dependency breaking instructions, there is still an +# implicit dependency on EFLAGS which limits the ILP. So, the hardware backend +# can only execute one instruction per cycle. + +sbb %edx, %edx +sbb %eax, %eax + +# CHECK: Iterations: 1500 +# CHECK-NEXT: Instructions: 3000 +# CHECK-NEXT: Total Cycles: 3003 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 2.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 1.00 sbbl %edx, %edx +# CHECK-NEXT: 1 1 1.00 sbbl %eax, %eax + +# CHECK: Resources: +# CHECK-NEXT: [0] - JALU0 +# CHECK-NEXT: [1] - JALU1 +# CHECK-NEXT: [2] - JDiv +# CHECK-NEXT: [3] - JFPA +# CHECK-NEXT: [4] - JFPM +# CHECK-NEXT: [5] - JFPU0 +# CHECK-NEXT: [6] - JFPU1 +# CHECK-NEXT: [7] - JLAGU +# CHECK-NEXT: [8] - JMul +# CHECK-NEXT: [9] - JSAGU +# CHECK-NEXT: [10] - JSTC +# CHECK-NEXT: [11] - JVALU0 +# CHECK-NEXT: [12] - JVALU1 +# CHECK-NEXT: [13] - JVIMUL + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: 2.00 2.00 - - - - - - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: +# CHECK-NEXT: - 2.00 - - - - - - - - - - - - sbbl %edx, %edx +# CHECK-NEXT: 2.00 - - - - - - - - - - - - - sbbl %eax, %eax + +# CHECK: Timeline view: +# CHECK-NEXT: Index 012345678 + +# CHECK: [0,0] DeER . . sbbl %edx, %edx +# CHECK-NEXT: [0,1] D=eER. . sbbl %eax, %eax +# CHECK-NEXT: [1,0] .D=eER . sbbl %edx, %edx +# CHECK-NEXT: [1,1] .D==eER . sbbl %eax, %eax +# CHECK-NEXT: [2,0] . D==eER. sbbl %edx, %edx +# CHECK-NEXT: [2,1] . D===eER sbbl %eax, %eax + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 3 2.0 0.3 0.0 sbbl %edx, %edx +# CHECK-NEXT: 1. 3 3.0 0.0 0.0 sbbl %eax, %eax diff --git a/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s b/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s new file mode 100644 index 00000000000..00b88954e48 --- /dev/null +++ b/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s @@ -0,0 +1,83 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s + +# perf stat reports a throughput of 1.51 IPC for this block of code. + +# The SBB does not depend on the value of register EAX. That means, it doesn't +# have to wait for the IMUL to write-back on EAX. However, it still depends on +# the ADD for EFLAGS. + +imul %edx, %eax +add %edx, %edx +sbb %eax, %eax + +# CHECK: Iterations: 1500 +# CHECK-NEXT: Instructions: 4500 +# CHECK-NEXT: Total Cycles: 6745 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.67 +# CHECK-NEXT: Block RThroughput: 2.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 2 3 1.00 imull %edx, %eax +# CHECK-NEXT: 1 1 0.50 addl %edx, %edx +# CHECK-NEXT: 1 1 1.00 sbbl %eax, %eax + +# CHECK: Resources: +# CHECK-NEXT: [0] - JALU0 +# CHECK-NEXT: [1] - JALU1 +# CHECK-NEXT: [2] - JDiv +# CHECK-NEXT: [3] - JFPA +# CHECK-NEXT: [4] - JFPM +# CHECK-NEXT: [5] - JFPU0 +# CHECK-NEXT: [6] - JFPU1 +# CHECK-NEXT: [7] - JLAGU +# CHECK-NEXT: [8] - JMul +# CHECK-NEXT: [9] - JSAGU +# CHECK-NEXT: [10] - JSTC +# CHECK-NEXT: [11] - JVALU0 +# CHECK-NEXT: [12] - JVALU1 +# CHECK-NEXT: [13] - JVIMUL + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: 2.01 1.99 - - - - - - 1.00 - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: +# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imull %edx, %eax +# CHECK-NEXT: 0.99 0.01 - - - - - - - - - - - - addl %edx, %edx +# CHECK-NEXT: 1.01 0.99 - - - - - - - - - - - - sbbl %eax, %eax + +# CHECK: Timeline view: +# CHECK-NEXT: 012345 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeER . . imull %edx, %eax +# CHECK-NEXT: [0,1] .DeE-R . . addl %edx, %edx +# CHECK-NEXT: [0,2] .D==eER . . sbbl %eax, %eax +# CHECK-NEXT: [1,0] . D===eeeER . imull %edx, %eax +# CHECK-NEXT: [1,1] . DeE----R . addl %edx, %edx +# CHECK-NEXT: [1,2] . D=====eER . sbbl %eax, %eax +# CHECK-NEXT: [2,0] . D=====eeeER. imull %edx, %eax +# CHECK-NEXT: [2,1] . DeE------R. addl %edx, %edx +# CHECK-NEXT: [2,2] . D=======eER sbbl %eax, %eax + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 3 3.7 0.7 0.0 imull %edx, %eax +# CHECK-NEXT: 1. 3 1.0 1.0 3.7 addl %edx, %edx +# CHECK-NEXT: 2. 3 5.7 0.0 0.0 sbbl %eax, %eax diff --git a/test/tools/llvm-mca/X86/BtVer2/one-idioms.s b/test/tools/llvm-mca/X86/BtVer2/one-idioms.s index e524fe15a8a..3beaf829c1a 100644 --- a/test/tools/llvm-mca/X86/BtVer2/one-idioms.s +++ b/test/tools/llvm-mca/X86/BtVer2/one-idioms.s @@ -6,7 +6,6 @@ pcmpeqb %mm2, %mm2 pcmpeqd %mm2, %mm2 -# pcmpeqq %mm2, %mm2 # invalid operand for instruction pcmpeqw %mm2, %mm2 pcmpeqb %xmm2, %xmm2