defm : X86WriteRes<WriteCMPXCHGRMW, [JALU01, JSAGU, JLAGU], 11, [3, 1, 1], 6>;
defm : X86WriteRes<WriteXCHG, [JALU01], 1, [2], 2>;
-defm : JWriteResIntPair<WriteIMul8, [JALU1, JMul], 3, [1, 1], 2>;
-defm : JWriteResIntPair<WriteIMul16, [JALU1, JMul], 3, [1, 1], 2>;
-defm : JWriteResIntPair<WriteIMul16Imm, [JALU1, JMul], 3, [1, 1], 2>;
-defm : JWriteResIntPair<WriteIMul16Reg, [JALU1, JMul], 3, [1, 1], 2>;
-defm : JWriteResIntPair<WriteIMul32, [JALU1, JMul], 3, [1, 1], 2>;
-defm : JWriteResIntPair<WriteIMul32Imm, [JALU1, JMul], 3, [1, 1], 2>;
-defm : JWriteResIntPair<WriteIMul32Reg, [JALU1, JMul], 3, [1, 1], 2>;
-defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>;
-defm : JWriteResIntPair<WriteIMul64Imm, [JALU1, JMul], 6, [1, 4], 2>;
-defm : JWriteResIntPair<WriteIMul64Reg, [JALU1, JMul], 6, [1, 4], 2>;
+defm : JWriteResIntPair<WriteIMul8, [JALU1, JMul], 3, [1, 1], 1>;
+defm : JWriteResIntPair<WriteIMul16, [JALU1, JMul], 3, [1, 3], 3>;
+defm : JWriteResIntPair<WriteIMul16Imm, [JALU1, JMul], 4, [1, 2], 2>;
+defm : JWriteResIntPair<WriteIMul16Reg, [JALU1, JMul], 3, [1, 1], 1>;
+defm : JWriteResIntPair<WriteIMul32, [JALU1, JMul], 3, [1, 2], 2>;
+defm : JWriteResIntPair<WriteIMul32Imm, [JALU1, JMul], 3, [1, 1], 1>;
+defm : JWriteResIntPair<WriteIMul32Reg, [JALU1, JMul], 3, [1, 1], 1>;
+defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>;
+defm : JWriteResIntPair<WriteIMul64Imm, [JALU1, JMul], 6, [1, 4], 1>;
+defm : JWriteResIntPair<WriteIMul64Reg, [JALU1, JMul], 6, [1, 4], 1>;
defm : X86WriteRes<WriteIMulH, [JALU1], 6, [4], 1>;
defm : JWriteResIntPair<WriteDiv8, [JALU1, JDiv], 12, [1, 12], 1>;
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
-# CHECK-NEXT: Total Cycles: 656
-# CHECK-NEXT: Total uOps: 1100
+# CHECK-NEXT: Total Cycles: 655
+# CHECK-NEXT: Total uOps: 1000
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 1.68
+# CHECK-NEXT: uOps Per Cycle: 1.53
# CHECK-NEXT: IPC: 0.61
-# CHECK-NEXT: Block RThroughput: 5.5
+# CHECK-NEXT: Block RThroughput: 5.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 6 4.00 imulq $5, %rcx, %rax
+# CHECK-NEXT: 1 6 4.00 imulq $5, %rcx, %rax
# CHECK-NEXT: 1 1 0.50 lzcntl %ecx, %eax
# CHECK-NEXT: 1 1 0.50 andq %rcx, %rax
# CHECK-NEXT: 7 4 4.00 bsfq %rax, %rcx
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456
+# CHECK-NEXT: 012345
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeeeeeeER . .. imulq $5, %rcx, %rax
-# CHECK-NEXT: [0,1] .DeE----R . .. lzcntl %ecx, %eax
-# CHECK-NEXT: [0,2] .D=eE----R. .. andq %rcx, %rax
-# CHECK-NEXT: [0,3] . D=eeeeER. .. bsfq %rax, %rcx
-# CHECK-NEXT: [1,0] . .D=eeeeeeER. imulq $5, %rcx, %rax
-# CHECK-NEXT: [1,1] . . D=eE----R. lzcntl %ecx, %eax
-# CHECK-NEXT: [1,2] . . D==eE----R andq %rcx, %rax
-# CHECK-NEXT: [1,3] . . D==eeeeER bsfq %rax, %rcx
+# CHECK: [0,0] DeeeeeeER . . imulq $5, %rcx, %rax
+# CHECK-NEXT: [0,1] DeE-----R . . lzcntl %ecx, %eax
+# CHECK-NEXT: [0,2] .DeE-----R. . andq %rcx, %rax
+# CHECK-NEXT: [0,3] . DeeeeE-R. . bsfq %rax, %rcx
+# CHECK-NEXT: [1,0] . D=eeeeeeER. imulq $5, %rcx, %rax
+# CHECK-NEXT: [1,1] . .D=eE----R. lzcntl %ecx, %eax
+# CHECK-NEXT: [1,2] . .D==eE----R andq %rcx, %rax
+# CHECK-NEXT: [1,3] . . D==eeeeER bsfq %rax, %rcx
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 1.5 0.5 0.0 imulq $5, %rcx, %rax
-# CHECK-NEXT: 1. 2 1.5 1.0 4.0 lzcntl %ecx, %eax
-# CHECK-NEXT: 2. 2 2.5 0.0 4.0 andq %rcx, %rax
-# CHECK-NEXT: 3. 2 2.5 0.0 0.0 bsfq %rax, %rcx
+# CHECK-NEXT: 1. 2 1.5 1.0 4.5 lzcntl %ecx, %eax
+# CHECK-NEXT: 2. 2 2.0 0.0 4.5 andq %rcx, %rax
+# CHECK-NEXT: 3. 2 2.0 0.0 0.5 bsfq %rax, %rcx
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 17
-# CHECK-NEXT: Total uOps: 8
+# CHECK-NEXT: Total uOps: 7
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.47
+# CHECK-NEXT: uOps Per Cycle: 0.41
# CHECK-NEXT: IPC: 0.12
# CHECK-NEXT: Block RThroughput: 4.0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 6 4.00 imulq %rax, %rax
+# CHECK-NEXT: 1 6 4.00 imulq %rax, %rax
# CHECK-NEXT: 6 11 1.50 * * cmpxchgq %rcx, (%rdx)
# CHECK: Resources:
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 17
-# CHECK-NEXT: Total uOps: 8
+# CHECK-NEXT: Total uOps: 7
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.47
+# CHECK-NEXT: uOps Per Cycle: 0.41
# CHECK-NEXT: IPC: 0.12
# CHECK-NEXT: Block RThroughput: 4.0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 6 4.00 imulq %rcx, %rcx
+# CHECK-NEXT: 1 6 4.00 imulq %rcx, %rcx
# CHECK-NEXT: 6 11 1.50 * * cmpxchgq %rcx, (%rdx)
# CHECK: Resources:
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 23
-# CHECK-NEXT: Total uOps: 8
+# CHECK-NEXT: Total uOps: 7
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.35
+# CHECK-NEXT: uOps Per Cycle: 0.30
# CHECK-NEXT: IPC: 0.09
# CHECK-NEXT: Block RThroughput: 17.0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 6 4.00 imulq %rax, %rax
+# CHECK-NEXT: 1 6 4.00 imulq %rax, %rax
# CHECK-NEXT: 6 17 17.00 * * lock cmpxchgq %rcx, (%rdx)
# CHECK: Resources:
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 23
-# CHECK-NEXT: Total uOps: 8
+# CHECK-NEXT: Total uOps: 7
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.35
+# CHECK-NEXT: uOps Per Cycle: 0.30
# CHECK-NEXT: IPC: 0.09
# CHECK-NEXT: Block RThroughput: 17.0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 6 4.00 imulq %rcx, %rcx
+# CHECK-NEXT: 1 6 4.00 imulq %rcx, %rcx
# CHECK-NEXT: 6 17 17.00 * * lock cmpxchgq %rcx, (%rdx)
# CHECK: Resources:
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 4500
-# CHECK-NEXT: Total Cycles: 3007
-# CHECK-NEXT: Total uOps: 6000
+# CHECK-NEXT: Total Cycles: 3006
+# CHECK-NEXT: Total uOps: 4500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 2.00
+# CHECK-NEXT: uOps Per Cycle: 1.50
# CHECK-NEXT: IPC: 1.50
-# CHECK-NEXT: Block RThroughput: 2.0
+# CHECK-NEXT: Block RThroughput: 1.5
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 3 1.00 imull %edx, %eax
+# CHECK-NEXT: 1 3 1.00 imull %edx, %eax
# CHECK-NEXT: 1 1 0.50 addl %edx, %edx
# CHECK-NEXT: 1 1 1.00 sbbl %eax, %eax
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeER .. imull %edx, %eax
-# CHECK-NEXT: [0,1] .DeE-R .. addl %edx, %edx
-# CHECK-NEXT: [0,2] .D=eE-R .. sbbl %eax, %eax
-# CHECK-NEXT: [1,0] . D==eeeER.. imull %edx, %eax
-# CHECK-NEXT: [1,1] . DeE---R.. addl %edx, %edx
-# CHECK-NEXT: [1,2] . D=eE---R. sbbl %eax, %eax
-# CHECK-NEXT: [2,0] . D=eeeER. imull %edx, %eax
-# CHECK-NEXT: [2,1] . D=eE--R addl %edx, %edx
-# CHECK-NEXT: [2,2] . D==eE-R sbbl %eax, %eax
+# CHECK-NEXT: [0,1] DeE--R .. addl %edx, %edx
+# CHECK-NEXT: [0,2] .DeE--R .. sbbl %eax, %eax
+# CHECK-NEXT: [1,0] .D==eeeER .. imull %edx, %eax
+# CHECK-NEXT: [1,1] . DeE---R .. addl %edx, %edx
+# CHECK-NEXT: [1,2] . D=eE---R.. sbbl %eax, %eax
+# CHECK-NEXT: [2,0] . D==eeeER. imull %edx, %eax
+# CHECK-NEXT: [2,1] . D=eE---R. addl %edx, %edx
+# CHECK-NEXT: [2,2] . D=eE---R sbbl %eax, %eax
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 3 2.0 0.7 0.0 imull %edx, %eax
-# CHECK-NEXT: 1. 3 1.3 1.3 2.0 addl %edx, %edx
-# CHECK-NEXT: 2. 3 2.3 0.0 1.7 sbbl %eax, %eax
+# CHECK-NEXT: 0. 3 2.3 1.0 0.0 imull %edx, %eax
+# CHECK-NEXT: 1. 3 1.3 1.0 2.7 addl %edx, %edx
+# CHECK-NEXT: 2. 3 1.7 0.0 2.7 sbbl %eax, %eax
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 3
# CHECK-NEXT: Total Cycles: 11
-# CHECK-NEXT: Total uOps: 4
+# CHECK-NEXT: Total uOps: 3
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.36
+# CHECK-NEXT: uOps Per Cycle: 0.27
# CHECK-NEXT: IPC: 0.27
# CHECK-NEXT: Block RThroughput: 4.0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 6 4.00 imulq %rax, %rbx
+# CHECK-NEXT: 1 6 4.00 imulq %rax, %rbx
# CHECK-NEXT: 1 1 0.50 lzcntw %ax, %bx
# CHECK-NEXT: 1 1 0.50 addl %ecx, %ebx
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeER . imulq %rax, %rbx
-# CHECK-NEXT: [0,1] .D=====eER. lzcntw %ax, %bx
+# CHECK-NEXT: [0,1] D======eER. lzcntw %ax, %bx
# CHECK-NEXT: [0,2] .D======eER addl %ecx, %ebx
# CHECK: Average Wait times (based on the timeline view):
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulq %rax, %rbx
-# CHECK-NEXT: 1. 1 6.0 0.0 0.0 lzcntw %ax, %bx
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 lzcntw %ax, %bx
# CHECK-NEXT: 2. 1 7.0 0.0 0.0 addl %ecx, %ebx
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 4500
# CHECK-NEXT: Total Cycles: 7503
-# CHECK-NEXT: Total uOps: 6000
+# CHECK-NEXT: Total uOps: 4500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.80
+# CHECK-NEXT: uOps Per Cycle: 0.60
# CHECK-NEXT: IPC: 0.60
-# CHECK-NEXT: Block RThroughput: 2.0
+# CHECK-NEXT: Block RThroughput: 1.5
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 3 1.00 imulw %ax, %bx
+# CHECK-NEXT: 1 3 1.00 imulw %ax, %bx
# CHECK-NEXT: 1 1 0.50 lzcntw %ax, %bx
# CHECK-NEXT: 1 1 0.50 addw %cx, %bx
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeER . . . imulw %ax, %bx
-# CHECK-NEXT: [0,1] .D==eER . . . lzcntw %ax, %bx
+# CHECK-NEXT: [0,1] D===eER . . . lzcntw %ax, %bx
# CHECK-NEXT: [0,2] .D===eER . . . addw %cx, %bx
-# CHECK-NEXT: [1,0] . D===eeeER . . imulw %ax, %bx
-# CHECK-NEXT: [1,1] . D=====eER . . lzcntw %ax, %bx
-# CHECK-NEXT: [1,2] . D======eER . . addw %cx, %bx
-# CHECK-NEXT: [2,0] . D======eeeER . imulw %ax, %bx
-# CHECK-NEXT: [2,1] . D========eER. lzcntw %ax, %bx
-# CHECK-NEXT: [2,2] . D=========eER addw %cx, %bx
+# CHECK-NEXT: [1,0] .D====eeeER . . imulw %ax, %bx
+# CHECK-NEXT: [1,1] . D======eER . . lzcntw %ax, %bx
+# CHECK-NEXT: [1,2] . D=======eER . . addw %cx, %bx
+# CHECK-NEXT: [2,0] . D=======eeeER . imulw %ax, %bx
+# CHECK-NEXT: [2,1] . D==========eER. lzcntw %ax, %bx
+# CHECK-NEXT: [2,2] . D==========eER addw %cx, %bx
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 3 4.0 0.3 0.0 imulw %ax, %bx
-# CHECK-NEXT: 1. 3 6.0 0.0 0.0 lzcntw %ax, %bx
-# CHECK-NEXT: 2. 3 7.0 0.0 0.0 addw %cx, %bx
+# CHECK-NEXT: 0. 3 4.7 0.3 0.0 imulw %ax, %bx
+# CHECK-NEXT: 1. 3 7.3 0.0 0.0 lzcntw %ax, %bx
+# CHECK-NEXT: 2. 3 7.7 0.0 0.0 addw %cx, %bx
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 4500
-# CHECK-NEXT: Total Cycles: 7504
-# CHECK-NEXT: Total uOps: 6000
+# CHECK-NEXT: Total Cycles: 7503
+# CHECK-NEXT: Total uOps: 4500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.80
+# CHECK-NEXT: uOps Per Cycle: 0.60
# CHECK-NEXT: IPC: 0.60
# CHECK-NEXT: Block RThroughput: 2.0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 3 1.00 imull %edx, %ecx
+# CHECK-NEXT: 1 3 1.00 imull %edx, %ecx
# CHECK-NEXT: 1 4 1.00 * lzcntw (%rsp), %cx
# CHECK-NEXT: 1 4 1.00 * lzcntw 2(%rsp), %cx
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - lzcntw 2(%rsp), %cx
# CHECK: Timeline view:
-# CHECK-NEXT: 012345678
+# CHECK-NEXT: 01234567
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeeeER . . . imull %edx, %ecx
-# CHECK-NEXT: [0,1] .DeeeeER . . . lzcntw (%rsp), %cx
-# CHECK-NEXT: [0,2] .D=eeeeER . . . lzcntw 2(%rsp), %cx
-# CHECK-NEXT: [1,0] . D====eeeER . . imull %edx, %ecx
-# CHECK-NEXT: [1,1] . D===eeeeER . . lzcntw (%rsp), %cx
-# CHECK-NEXT: [1,2] . D====eeeeER . . lzcntw 2(%rsp), %cx
-# CHECK-NEXT: [2,0] . D=======eeeER . imull %edx, %ecx
-# CHECK-NEXT: [2,1] . D======eeeeER. lzcntw (%rsp), %cx
-# CHECK-NEXT: [2,2] . D=======eeeeER lzcntw 2(%rsp), %cx
+# CHECK: [0,0] DeeeER . . . imull %edx, %ecx
+# CHECK-NEXT: [0,1] DeeeeER . . . lzcntw (%rsp), %cx
+# CHECK-NEXT: [0,2] .DeeeeER . . . lzcntw 2(%rsp), %cx
+# CHECK-NEXT: [1,0] .D====eeeER . . imull %edx, %ecx
+# CHECK-NEXT: [1,1] . D===eeeeER . . lzcntw (%rsp), %cx
+# CHECK-NEXT: [1,2] . D====eeeeER . . lzcntw 2(%rsp), %cx
+# CHECK-NEXT: [2,0] . D=======eeeER . imull %edx, %ecx
+# CHECK-NEXT: [2,1] . D=======eeeeER. lzcntw (%rsp), %cx
+# CHECK-NEXT: [2,2] . D=======eeeeER lzcntw 2(%rsp), %cx
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 4.7 0.3 0.0 imull %edx, %ecx
-# CHECK-NEXT: 1. 3 4.0 0.3 0.0 lzcntw (%rsp), %cx
-# CHECK-NEXT: 2. 3 5.0 0.0 0.0 lzcntw 2(%rsp), %cx
+# CHECK-NEXT: 1. 3 4.3 0.0 0.0 lzcntw (%rsp), %cx
+# CHECK-NEXT: 2. 3 4.7 0.0 0.0 lzcntw 2(%rsp), %cx
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 500
# CHECK-NEXT: Total Cycles: 504
-# CHECK-NEXT: Total uOps: 600
+# CHECK-NEXT: Total uOps: 500
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 1.19
+# CHECK-NEXT: uOps Per Cycle: 0.99
# CHECK-NEXT: IPC: 0.99
-# CHECK-NEXT: Block RThroughput: 3.0
+# CHECK-NEXT: Block RThroughput: 2.5
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: 1 1 0.50 sete %r9b
# CHECK-NEXT: 1 1 0.50 movzbl %al, %eax
# CHECK-NEXT: 1 1 0.50 shll $2, %eax
-# CHECK-NEXT: 2 3 1.00 imull %ecx, %eax
+# CHECK-NEXT: 1 3 1.00 imull %ecx, %eax
# CHECK-NEXT: 1 1 0.50 cmpl $1025, %eax
# CHECK: Resources:
# CHECK: [0,0] DeER . . . . . . sete %r9b
# CHECK-NEXT: [0,1] DeER . . . . . . movzbl %al, %eax
# CHECK-NEXT: [0,2] .DeER. . . . . . shll $2, %eax
-# CHECK-NEXT: [0,3] . DeeeER . . . . . imull %ecx, %eax
-# CHECK-NEXT: [0,4] . D==eER . . . . . cmpl $1025, %eax
-# CHECK-NEXT: [1,0] . D===eER. . . . . sete %r9b
-# CHECK-NEXT: [1,1] . D=eE-R. . . . . movzbl %al, %eax
-# CHECK-NEXT: [1,2] . D==eE-R . . . . shll $2, %eax
-# CHECK-NEXT: [1,3] . D==eeeER . . . . imull %ecx, %eax
-# CHECK-NEXT: [1,4] . .D====eER . . . . cmpl $1025, %eax
-# CHECK-NEXT: [2,0] . .D=====eER. . . . sete %r9b
-# CHECK-NEXT: [2,1] . . D===eE-R. . . . movzbl %al, %eax
-# CHECK-NEXT: [2,2] . . D====eE-R . . . shll $2, %eax
-# CHECK-NEXT: [2,3] . . D====eeeER . . . imull %ecx, %eax
-# CHECK-NEXT: [2,4] . . D======eER . . . cmpl $1025, %eax
-# CHECK-NEXT: [3,0] . . D=======eER. . . sete %r9b
-# CHECK-NEXT: [3,1] . . D=====eE-R. . . movzbl %al, %eax
-# CHECK-NEXT: [3,2] . . D======eE-R . . shll $2, %eax
-# CHECK-NEXT: [3,3] . . .D======eeeER . . imull %ecx, %eax
-# CHECK-NEXT: [3,4] . . . D========eER . . cmpl $1025, %eax
-# CHECK-NEXT: [4,0] . . . D=========eER. . sete %r9b
-# CHECK-NEXT: [4,1] . . . D=======eE-R. . movzbl %al, %eax
-# CHECK-NEXT: [4,2] . . . D========eE-R . shll $2, %eax
-# CHECK-NEXT: [4,3] . . . D========eeeER. imull %ecx, %eax
-# CHECK-NEXT: [4,4] . . . D==========eER cmpl $1025, %eax
+# CHECK-NEXT: [0,3] .D=eeeER . . . . . imull %ecx, %eax
+# CHECK-NEXT: [0,4] . D===eER . . . . . cmpl $1025, %eax
+# CHECK-NEXT: [1,0] . D====eER. . . . . sete %r9b
+# CHECK-NEXT: [1,1] . D==eE-R. . . . . movzbl %al, %eax
+# CHECK-NEXT: [1,2] . D===eE-R . . . . shll $2, %eax
+# CHECK-NEXT: [1,3] . D===eeeER . . . . imull %ecx, %eax
+# CHECK-NEXT: [1,4] . D======eER . . . . cmpl $1025, %eax
+# CHECK-NEXT: [2,0] . D======eER. . . . sete %r9b
+# CHECK-NEXT: [2,1] . D=====eE-R. . . . movzbl %al, %eax
+# CHECK-NEXT: [2,2] . .D=====eE-R . . . shll $2, %eax
+# CHECK-NEXT: [2,3] . .D======eeeER . . . imull %ecx, %eax
+# CHECK-NEXT: [2,4] . . D========eER . . . cmpl $1025, %eax
+# CHECK-NEXT: [3,0] . . D=========eER. . . sete %r9b
+# CHECK-NEXT: [3,1] . . D=======eE-R. . . movzbl %al, %eax
+# CHECK-NEXT: [3,2] . . D========eE-R . . shll $2, %eax
+# CHECK-NEXT: [3,3] . . D========eeeER . . imull %ecx, %eax
+# CHECK-NEXT: [3,4] . . D===========eER . . cmpl $1025, %eax
+# CHECK-NEXT: [4,0] . . D===========eER. . sete %r9b
+# CHECK-NEXT: [4,1] . . D==========eE-R. . movzbl %al, %eax
+# CHECK-NEXT: [4,2] . . .D==========eE-R . shll $2, %eax
+# CHECK-NEXT: [4,3] . . .D===========eeeER. imull %ecx, %eax
+# CHECK-NEXT: [4,4] . . . D=============eER cmpl $1025, %eax
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 5 5.8 0.2 0.0 sete %r9b
-# CHECK-NEXT: 1. 5 4.2 0.2 0.8 movzbl %al, %eax
-# CHECK-NEXT: 2. 5 5.0 0.0 0.8 shll $2, %eax
-# CHECK-NEXT: 3. 5 5.0 0.0 0.0 imull %ecx, %eax
-# CHECK-NEXT: 4. 5 7.0 0.0 0.0 cmpl $1025, %eax
+# CHECK-NEXT: 0. 5 7.0 0.2 0.0 sete %r9b
+# CHECK-NEXT: 1. 5 5.8 0.2 0.8 movzbl %al, %eax
+# CHECK-NEXT: 2. 5 6.2 0.0 0.8 shll $2, %eax
+# CHECK-NEXT: 3. 5 6.8 0.0 0.0 imull %ecx, %eax
+# CHECK-NEXT: 4. 5 9.2 0.0 0.0 cmpl $1025, %eax
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 3
# CHECK-NEXT: Total Cycles: 8
-# CHECK-NEXT: Total uOps: 4
+# CHECK-NEXT: Total uOps: 3
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: uOps Per Cycle: 0.38
# CHECK-NEXT: IPC: 0.38
-# CHECK-NEXT: Block RThroughput: 2.0
+# CHECK-NEXT: Block RThroughput: 1.5
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 3 1.00 imulw %ax, %cx
+# CHECK-NEXT: 1 3 1.00 imulw %ax, %cx
# CHECK-NEXT: 1 1 0.50 addb %al, %cl
# CHECK-NEXT: 1 1 0.50 addl %ecx, %ebx
# CHECK-NEXT: Index 01234567
# CHECK: [0,0] DeeeER . imulw %ax, %cx
-# CHECK-NEXT: [0,1] .D==eER. addb %al, %cl
+# CHECK-NEXT: [0,1] D===eER. addb %al, %cl
# CHECK-NEXT: [0,2] .D===eER addl %ecx, %ebx
# CHECK: Average Wait times (based on the timeline view):
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulw %ax, %cx
-# CHECK-NEXT: 1. 1 3.0 0.0 0.0 addb %al, %cl
+# CHECK-NEXT: 1. 1 4.0 0.0 0.0 addb %al, %cl
# CHECK-NEXT: 2. 1 4.0 0.0 0.0 addl %ecx, %ebx
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 10
+# CHECK-NEXT: Total Cycles: 11
# CHECK-NEXT: Total uOps: 4
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.40
-# CHECK-NEXT: IPC: 0.20
-# CHECK-NEXT: Block RThroughput: 2.0
+# CHECK-NEXT: uOps Per Cycle: 0.36
+# CHECK-NEXT: IPC: 0.18
+# CHECK-NEXT: Block RThroughput: 4.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 3 1.00 imull %esi
-# CHECK-NEXT: 2 6 1.00 * imull (%rdi)
+# CHECK-NEXT: 2 3 2.00 imull %esi
+# CHECK-NEXT: 2 6 2.00 * imull (%rdi)
# CHECK: Timeline view:
+# CHECK-NEXT: 0
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeeeER . imull %esi
-# CHECK-NEXT: [0,1] .DeeeeeeER imull (%rdi)
+# CHECK: [0,0] DeeeER . imull %esi
+# CHECK-NEXT: [0,1] .D=eeeeeeER imull (%rdi)
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imull %esi
-# CHECK-NEXT: 1. 1 1.0 1.0 0.0 imull (%rdi)
+# CHECK-NEXT: 1. 1 2.0 2.0 0.0 imull (%rdi)
# CHECK-NEXT: 2 28 25.00 * U idivl (%rax)
# CHECK-NEXT: 2 41 41.00 U idivq %rcx
# CHECK-NEXT: 2 44 41.00 * U idivq (%rax)
-# CHECK-NEXT: 2 3 1.00 imulb %dil
-# CHECK-NEXT: 2 6 1.00 * imulb (%rax)
-# CHECK-NEXT: 2 3 1.00 imulw %di
-# CHECK-NEXT: 2 6 1.00 * imulw (%rax)
-# CHECK-NEXT: 2 3 1.00 imulw %si, %di
-# CHECK-NEXT: 2 6 1.00 * imulw (%rax), %di
-# CHECK-NEXT: 2 3 1.00 imulw $511, %si, %di
-# CHECK-NEXT: 2 6 1.00 * imulw $511, (%rax), %di
-# CHECK-NEXT: 2 3 1.00 imulw $7, %si, %di
-# CHECK-NEXT: 2 6 1.00 * imulw $7, (%rax), %di
-# CHECK-NEXT: 2 3 1.00 imull %edi
-# CHECK-NEXT: 2 6 1.00 * imull (%rax)
-# CHECK-NEXT: 2 3 1.00 imull %esi, %edi
-# CHECK-NEXT: 2 6 1.00 * imull (%rax), %edi
-# CHECK-NEXT: 2 3 1.00 imull $665536, %esi, %edi
-# CHECK-NEXT: 2 6 1.00 * imull $665536, (%rax), %edi
-# CHECK-NEXT: 2 3 1.00 imull $7, %esi, %edi
-# CHECK-NEXT: 2 6 1.00 * imull $7, (%rax), %edi
+# CHECK-NEXT: 1 3 1.00 imulb %dil
+# CHECK-NEXT: 1 6 1.00 * imulb (%rax)
+# CHECK-NEXT: 3 3 3.00 imulw %di
+# CHECK-NEXT: 3 6 3.00 * imulw (%rax)
+# CHECK-NEXT: 1 3 1.00 imulw %si, %di
+# CHECK-NEXT: 1 6 1.00 * imulw (%rax), %di
+# CHECK-NEXT: 2 4 2.00 imulw $511, %si, %di
+# CHECK-NEXT: 2 7 2.00 * imulw $511, (%rax), %di
+# CHECK-NEXT: 2 4 2.00 imulw $7, %si, %di
+# CHECK-NEXT: 2 7 2.00 * imulw $7, (%rax), %di
+# CHECK-NEXT: 2 3 2.00 imull %edi
+# CHECK-NEXT: 2 6 2.00 * imull (%rax)
+# CHECK-NEXT: 1 3 1.00 imull %esi, %edi
+# CHECK-NEXT: 1 6 1.00 * imull (%rax), %edi
+# CHECK-NEXT: 1 3 1.00 imull $665536, %esi, %edi
+# CHECK-NEXT: 1 6 1.00 * imull $665536, (%rax), %edi
+# CHECK-NEXT: 1 3 1.00 imull $7, %esi, %edi
+# CHECK-NEXT: 1 6 1.00 * imull $7, (%rax), %edi
# CHECK-NEXT: 2 6 4.00 imulq %rdi
# CHECK-NEXT: 2 9 4.00 * imulq (%rax)
-# CHECK-NEXT: 2 6 4.00 imulq %rsi, %rdi
-# CHECK-NEXT: 2 9 4.00 * imulq (%rax), %rdi
-# CHECK-NEXT: 2 6 4.00 imulq $665536, %rsi, %rdi
-# CHECK-NEXT: 2 9 4.00 * imulq $665536, (%rax), %rdi
-# CHECK-NEXT: 2 6 4.00 imulq $7, %rsi, %rdi
-# CHECK-NEXT: 2 9 4.00 * imulq $7, (%rax), %rdi
+# CHECK-NEXT: 1 6 4.00 imulq %rsi, %rdi
+# CHECK-NEXT: 1 9 4.00 * imulq (%rax), %rdi
+# CHECK-NEXT: 1 6 4.00 imulq $665536, %rsi, %rdi
+# CHECK-NEXT: 1 9 4.00 * imulq $665536, (%rax), %rdi
+# CHECK-NEXT: 1 6 4.00 imulq $7, %rsi, %rdi
+# CHECK-NEXT: 1 9 4.00 * imulq $7, (%rax), %rdi
# CHECK-NEXT: 1 100 0.50 U inb $7, %al
# CHECK-NEXT: 1 100 0.50 U inb %dx, %al
# CHECK-NEXT: 1 100 0.50 U inw $7, %ax
# CHECK-NEXT: 1 4 1.00 * movzwq (%rax), %rdi
# CHECK-NEXT: 1 1 0.50 movslq %eax, %rdi
# CHECK-NEXT: 1 4 1.00 * movslq (%rax), %rdi
-# CHECK-NEXT: 2 3 1.00 mulb %dil
-# CHECK-NEXT: 2 6 1.00 * mulb (%rax)
-# CHECK-NEXT: 2 3 1.00 mulw %si
-# CHECK-NEXT: 2 6 1.00 * mulw (%rax)
-# CHECK-NEXT: 2 3 1.00 mull %edx
-# CHECK-NEXT: 2 6 1.00 * mull (%rax)
+# CHECK-NEXT: 1 3 1.00 mulb %dil
+# CHECK-NEXT: 1 6 1.00 * mulb (%rax)
+# CHECK-NEXT: 3 3 3.00 mulw %si
+# CHECK-NEXT: 3 6 3.00 * mulw (%rax)
+# CHECK-NEXT: 2 3 2.00 mull %edx
+# CHECK-NEXT: 2 6 2.00 * mull (%rax)
# CHECK-NEXT: 2 6 4.00 mulq %rcx
# CHECK-NEXT: 2 9 4.00 * mulq (%rax)
# CHECK-NEXT: 1 1 0.50 negb %dil
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
-# CHECK-NEXT: 722.50 772.50 380.00 - - - - 992.00 64.00 893.00 - - - -
+# CHECK-NEXT: 722.50 772.50 380.00 - - - - 992.00 80.00 893.00 - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - 1.00 41.00 - - - - 1.00 - - - - - - idivq (%rax)
# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imulb %dil
# CHECK-NEXT: - 1.00 - - - - - 1.00 1.00 - - - - - imulb (%rax)
-# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imulw %di
-# CHECK-NEXT: - 1.00 - - - - - 1.00 1.00 - - - - - imulw (%rax)
+# CHECK-NEXT: - 1.00 - - - - - - 3.00 - - - - - imulw %di
+# CHECK-NEXT: - 1.00 - - - - - 1.00 3.00 - - - - - imulw (%rax)
# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imulw %si, %di
# CHECK-NEXT: - 1.00 - - - - - 1.00 1.00 - - - - - imulw (%rax), %di
-# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imulw $511, %si, %di
-# CHECK-NEXT: - 1.00 - - - - - 1.00 1.00 - - - - - imulw $511, (%rax), %di
-# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imulw $7, %si, %di
-# CHECK-NEXT: - 1.00 - - - - - 1.00 1.00 - - - - - imulw $7, (%rax), %di
-# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imull %edi
-# CHECK-NEXT: - 1.00 - - - - - 1.00 1.00 - - - - - imull (%rax)
+# CHECK-NEXT: - 1.00 - - - - - - 2.00 - - - - - imulw $511, %si, %di
+# CHECK-NEXT: - 1.00 - - - - - 1.00 2.00 - - - - - imulw $511, (%rax), %di
+# CHECK-NEXT: - 1.00 - - - - - - 2.00 - - - - - imulw $7, %si, %di
+# CHECK-NEXT: - 1.00 - - - - - 1.00 2.00 - - - - - imulw $7, (%rax), %di
+# CHECK-NEXT: - 1.00 - - - - - - 2.00 - - - - - imull %edi
+# CHECK-NEXT: - 1.00 - - - - - 1.00 2.00 - - - - - imull (%rax)
# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imull %esi, %edi
# CHECK-NEXT: - 1.00 - - - - - 1.00 1.00 - - - - - imull (%rax), %edi
# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imull $665536, %esi, %edi
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - movslq (%rax), %rdi
# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - mulb %dil
# CHECK-NEXT: - 1.00 - - - - - 1.00 1.00 - - - - - mulb (%rax)
-# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - mulw %si
-# CHECK-NEXT: - 1.00 - - - - - 1.00 1.00 - - - - - mulw (%rax)
-# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - mull %edx
-# CHECK-NEXT: - 1.00 - - - - - 1.00 1.00 - - - - - mull (%rax)
+# CHECK-NEXT: - 1.00 - - - - - - 3.00 - - - - - mulw %si
+# CHECK-NEXT: - 1.00 - - - - - 1.00 3.00 - - - - - mulw (%rax)
+# CHECK-NEXT: - 1.00 - - - - - - 2.00 - - - - - mull %edx
+# CHECK-NEXT: - 1.00 - - - - - 1.00 2.00 - - - - - mull (%rax)
# CHECK-NEXT: - 1.00 - - - - - - 4.00 - - - - - mulq %rcx
# CHECK-NEXT: - 1.00 - - - - - 1.00 4.00 - - - - - mulq (%rax)
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - negb %dil
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 10
# CHECK-NEXT: Total Cycles: 27
-# CHECK-NEXT: Total uOps: 20
+# CHECK-NEXT: Total uOps: 16
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.74
+# CHECK-NEXT: uOps Per Cycle: 0.59
# CHECK-NEXT: IPC: 0.37
-# CHECK-NEXT: Block RThroughput: 5.0
+# CHECK-NEXT: Block RThroughput: 4.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: 4 11 1.50 * * xaddl %ecx, (%rsp)
# CHECK-NEXT: 1 1 0.50 addl %ecx, %ecx
# CHECK-NEXT: 1 1 0.50 addl %ecx, %ecx
-# CHECK-NEXT: 2 3 1.00 imull %ecx, %ecx
-# CHECK-NEXT: 2 3 1.00 imull %ecx, %ecx
+# CHECK-NEXT: 1 3 1.00 imull %ecx, %ecx
+# CHECK-NEXT: 1 3 1.00 imull %ecx, %ecx
# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK-NEXT: [0,1] . D=eE-------R . . .. addl %ecx, %ecx
# CHECK-NEXT: [0,2] . D==eE-------R. . .. addl %ecx, %ecx
# CHECK-NEXT: [0,3] . D==eeeE----R. . .. imull %ecx, %ecx
-# CHECK-NEXT: [0,4] . D====eeeE--R . .. imull %ecx, %ecx
-# CHECK-NEXT: [1,0] . D======eeeeeeeeeeeER.. xaddl %ecx, (%rsp)
-# CHECK-NEXT: [1,1] . . D=======eE-------R.. addl %ecx, %ecx
-# CHECK-NEXT: [1,2] . . D========eE-------R. addl %ecx, %ecx
-# CHECK-NEXT: [1,3] . . D========eeeE----R. imull %ecx, %ecx
-# CHECK-NEXT: [1,4] . . D==========eeeE--R imull %ecx, %ecx
+# CHECK-NEXT: [0,4] . D=====eeeE--R . .. imull %ecx, %ecx
+# CHECK-NEXT: [1,0] . D=======eeeeeeeeeeeER.. xaddl %ecx, (%rsp)
+# CHECK-NEXT: [1,1] . .D========eE-------R.. addl %ecx, %ecx
+# CHECK-NEXT: [1,2] . .D=========eE-------R. addl %ecx, %ecx
+# CHECK-NEXT: [1,3] . . D=========eeeE----R. imull %ecx, %ecx
+# CHECK-NEXT: [1,4] . . D============eeeE--R imull %ecx, %ecx
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 2 4.0 0.5 0.0 xaddl %ecx, (%rsp)
-# CHECK-NEXT: 1. 2 5.0 0.0 7.0 addl %ecx, %ecx
-# CHECK-NEXT: 2. 2 6.0 0.0 7.0 addl %ecx, %ecx
-# CHECK-NEXT: 3. 2 6.0 0.0 4.0 imull %ecx, %ecx
-# CHECK-NEXT: 4. 2 8.0 0.0 2.0 imull %ecx, %ecx
+# CHECK-NEXT: 0. 2 4.5 0.5 0.0 xaddl %ecx, (%rsp)
+# CHECK-NEXT: 1. 2 5.5 0.0 7.0 addl %ecx, %ecx
+# CHECK-NEXT: 2. 2 6.5 0.0 7.0 addl %ecx, %ecx
+# CHECK-NEXT: 3. 2 6.5 0.0 4.0 imull %ecx, %ecx
+# CHECK-NEXT: 4. 2 9.5 0.0 2.0 imull %ecx, %ecx
# CHECK: [1] Code Region
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 10
# CHECK-NEXT: Total Cycles: 38
-# CHECK-NEXT: Total uOps: 20
+# CHECK-NEXT: Total uOps: 16
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.53
+# CHECK-NEXT: uOps Per Cycle: 0.42
# CHECK-NEXT: IPC: 0.26
# CHECK-NEXT: Block RThroughput: 16.0
# CHECK-NEXT: 4 16 16.00 * * lock xaddl %ecx, (%rsp)
# CHECK-NEXT: 1 1 0.50 addl %ecx, %ecx
# CHECK-NEXT: 1 1 0.50 addl %ecx, %ecx
-# CHECK-NEXT: 2 3 1.00 imull %ecx, %ecx
-# CHECK-NEXT: 2 3 1.00 imull %ecx, %ecx
+# CHECK-NEXT: 1 3 1.00 imull %ecx, %ecx
+# CHECK-NEXT: 1 3 1.00 imull %ecx, %ecx
# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK-NEXT: [0,1] . D=========eE----R . . . . . addl %ecx, %ecx
# CHECK-NEXT: [0,2] . D==========eE----R. . . . . addl %ecx, %ecx
# CHECK-NEXT: [0,3] . D==========eeeE-R. . . . . imull %ecx, %ecx
-# CHECK-NEXT: [0,4] . D============eeeER . . . . imull %ecx, %ecx
-# CHECK-NEXT: [1,0] . D===========eeeeeeeeeeeeeeeeER. . lock xaddl %ecx, (%rsp)
-# CHECK-NEXT: [1,1] . . D====================eE----R. . addl %ecx, %ecx
-# CHECK-NEXT: [1,2] . . D=====================eE----R . addl %ecx, %ecx
-# CHECK-NEXT: [1,3] . . D=====================eeeE-R . imull %ecx, %ecx
-# CHECK-NEXT: [1,4] . . D=======================eeeER imull %ecx, %ecx
+# CHECK-NEXT: [0,4] . D=============eeeER . . . . imull %ecx, %ecx
+# CHECK-NEXT: [1,0] . D============eeeeeeeeeeeeeeeeER. . lock xaddl %ecx, (%rsp)
+# CHECK-NEXT: [1,1] . .D=====================eE----R. . addl %ecx, %ecx
+# CHECK-NEXT: [1,2] . .D======================eE----R . addl %ecx, %ecx
+# CHECK-NEXT: [1,3] . . D======================eeeE-R . imull %ecx, %ecx
+# CHECK-NEXT: [1,4] . . D=========================eeeER imull %ecx, %ecx
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 2 6.5 0.5 0.0 lock xaddl %ecx, (%rsp)
-# CHECK-NEXT: 1. 2 15.5 0.0 4.0 addl %ecx, %ecx
-# CHECK-NEXT: 2. 2 16.5 0.0 4.0 addl %ecx, %ecx
-# CHECK-NEXT: 3. 2 16.5 0.0 1.0 imull %ecx, %ecx
-# CHECK-NEXT: 4. 2 18.5 0.0 0.0 imull %ecx, %ecx
+# CHECK-NEXT: 0. 2 7.0 0.5 0.0 lock xaddl %ecx, (%rsp)
+# CHECK-NEXT: 1. 2 16.0 0.0 4.0 addl %ecx, %ecx
+# CHECK-NEXT: 2. 2 17.0 0.0 4.0 addl %ecx, %ecx
+# CHECK-NEXT: 3. 2 17.0 0.0 1.0 imull %ecx, %ecx
+# CHECK-NEXT: 4. 2 20.0 0.0 0.0 imull %ecx, %ecx
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 10
# CHECK-NEXT: Total Cycles: 38
-# CHECK-NEXT: Total uOps: 18
+# CHECK-NEXT: Total uOps: 14
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.47
+# CHECK-NEXT: uOps Per Cycle: 0.37
# CHECK-NEXT: IPC: 0.26
# CHECK-NEXT: Block RThroughput: 16.0
# CHECK-NEXT: 3 16 16.00 * * xchgl %ecx, (%rsp)
# CHECK-NEXT: 1 1 0.50 addl %ecx, %ecx
# CHECK-NEXT: 1 1 0.50 addl %ecx, %ecx
-# CHECK-NEXT: 2 3 1.00 imull %ecx, %ecx
-# CHECK-NEXT: 2 3 1.00 imull %ecx, %ecx
+# CHECK-NEXT: 1 3 1.00 imull %ecx, %ecx
+# CHECK-NEXT: 1 3 1.00 imull %ecx, %ecx
# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK: [0,0] DeeeeeeeeeeeeeeeeER . . . . . xchgl %ecx, (%rsp)
# CHECK-NEXT: [0,1] .D==========eE----R . . . . . addl %ecx, %ecx
# CHECK-NEXT: [0,2] . D==========eE----R. . . . . addl %ecx, %ecx
-# CHECK-NEXT: [0,3] . D==========eeeE-R. . . . . imull %ecx, %ecx
-# CHECK-NEXT: [0,4] . D============eeeER . . . . imull %ecx, %ecx
-# CHECK-NEXT: [1,0] . D===========eeeeeeeeeeeeeeeeER. . xchgl %ecx, (%rsp)
-# CHECK-NEXT: [1,1] . .D=====================eE----R. . addl %ecx, %ecx
-# CHECK-NEXT: [1,2] . . D=====================eE----R . addl %ecx, %ecx
-# CHECK-NEXT: [1,3] . . D=====================eeeE-R . imull %ecx, %ecx
-# CHECK-NEXT: [1,4] . . D=======================eeeER imull %ecx, %ecx
+# CHECK-NEXT: [0,3] . D===========eeeE-R. . . . . imull %ecx, %ecx
+# CHECK-NEXT: [0,4] . D=============eeeER . . . . imull %ecx, %ecx
+# CHECK-NEXT: [1,0] . D============eeeeeeeeeeeeeeeeER. . xchgl %ecx, (%rsp)
+# CHECK-NEXT: [1,1] . D======================eE----R. . addl %ecx, %ecx
+# CHECK-NEXT: [1,2] . .D======================eE----R . addl %ecx, %ecx
+# CHECK-NEXT: [1,3] . .D=======================eeeE-R . imull %ecx, %ecx
+# CHECK-NEXT: [1,4] . . D=========================eeeER imull %ecx, %ecx
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 2 6.5 0.5 0.0 xchgl %ecx, (%rsp)
-# CHECK-NEXT: 1. 2 16.5 0.0 4.0 addl %ecx, %ecx
-# CHECK-NEXT: 2. 2 16.5 0.0 4.0 addl %ecx, %ecx
-# CHECK-NEXT: 3. 2 16.5 0.0 1.0 imull %ecx, %ecx
-# CHECK-NEXT: 4. 2 18.5 0.0 0.0 imull %ecx, %ecx
+# CHECK-NEXT: 0. 2 7.0 0.5 0.0 xchgl %ecx, (%rsp)
+# CHECK-NEXT: 1. 2 17.0 0.0 4.0 addl %ecx, %ecx
+# CHECK-NEXT: 2. 2 17.0 0.0 4.0 addl %ecx, %ecx
+# CHECK-NEXT: 3. 2 18.0 0.0 1.0 imull %ecx, %ecx
+# CHECK-NEXT: 4. 2 20.0 0.0 0.0 imull %ecx, %ecx
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 400
-# ALL-NEXT: Total Cycles: 305
-# ALL-NEXT: Total uOps: 500
+# ALL-NEXT: Total Cycles: 306
+# ALL-NEXT: Total uOps: 400
# ALL: Dispatch Width: 2
-# ALL-NEXT: uOps Per Cycle: 1.64
+# ALL-NEXT: uOps Per Cycle: 1.31
# ALL-NEXT: IPC: 1.31
-# ALL-NEXT: Block RThroughput: 2.5
+# ALL-NEXT: Block RThroughput: 2.0
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ATT-NEXT: 1 1 0.50 movl $1, %eax
# ATT-NEXT: 1 1 0.50 movl $255, %ebx
-# ATT-NEXT: 2 3 1.00 imull %edi, %esi
+# ATT-NEXT: 1 3 1.00 imull %edi, %esi
# ATT-NEXT: 1 1 0.50 leal (%rsi,%rdi), %eax
# INTEL-NEXT: 1 1 0.50 mov eax, 1
# INTEL-NEXT: 1 1 0.50 mov ebx, 255
-# INTEL-NEXT: 2 3 1.00 imul esi, edi
+# INTEL-NEXT: 1 3 1.00 imul esi, edi
# INTEL-NEXT: 1 1 0.50 lea eax, [rsi + rdi]
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 200
-# CHECK-NEXT: Total Cycles: 205
-# CHECK-NEXT: Total uOps: 300
+# CHECK-NEXT: Total Cycles: 106
+# CHECK-NEXT: Total uOps: 200
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 1.46
-# CHECK-NEXT: IPC: 0.98
-# CHECK-NEXT: Block RThroughput: 1.5
+# CHECK-NEXT: uOps Per Cycle: 1.89
+# CHECK-NEXT: IPC: 1.89
+# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.50 leal 42(%rdi), %eax
-# CHECK-NEXT: 2 3 1.00 imull %esi, %eax
+# CHECK-NEXT: 1 3 1.00 imull %esi, %eax
# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 200
-# CHECK-NEXT: Total Cycles: 204
-# CHECK-NEXT: Total uOps: 300
+# CHECK-NEXT: Total Cycles: 105
+# CHECK-NEXT: Total uOps: 200
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 1.47
-# CHECK-NEXT: IPC: 0.98
-# CHECK-NEXT: Block RThroughput: 1.5
+# CHECK-NEXT: uOps Per Cycle: 1.90
+# CHECK-NEXT: IPC: 1.90
+# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 3 1.00 imull %esi, %eax
+# CHECK-NEXT: 1 3 1.00 imull %esi, %eax
# CHECK-NEXT: 1 1 0.50 leal 42(%rdi), %eax
# CHECK: Resources:
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 300
-# CHECK-NEXT: Total Cycles: 205
-# CHECK-NEXT: Total uOps: 400
+# CHECK-NEXT: Total Cycles: 156
+# CHECK-NEXT: Total uOps: 300
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 1.95
-# CHECK-NEXT: IPC: 1.46
-# CHECK-NEXT: Block RThroughput: 2.0
+# CHECK-NEXT: uOps Per Cycle: 1.92
+# CHECK-NEXT: IPC: 1.92
+# CHECK-NEXT: Block RThroughput: 1.5
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.50 leal 42(%rdi), %eax
-# CHECK-NEXT: 2 3 1.00 imull %esi, %eax
+# CHECK-NEXT: 1 3 1.00 imull %esi, %eax
# CHECK-NEXT: 1 1 0.50 leal 42(%rdi), %eax
# CHECK: Resources:
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
-# CHECK-NEXT: 1.00 2.00 - - - - - - 1.00 - - - - -
+# CHECK-NEXT: 1.49 1.51 - - - - - - 1.00 - - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
-# CHECK-NEXT: - 1.00 - - - - - - - - - - - - leal 42(%rdi), %eax
+# CHECK-NEXT: 0.99 0.01 - - - - - - - - - - - - leal 42(%rdi), %eax
# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imull %esi, %eax
-# CHECK-NEXT: 1.00 - - - - - - - - - - - - - leal 42(%rdi), %eax
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - leal 42(%rdi), %eax
# CHECK: [1] Code Region - inner
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 100
# CHECK-NEXT: Total Cycles: 303
-# CHECK-NEXT: Total uOps: 200
+# CHECK-NEXT: Total uOps: 100
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.66
+# CHECK-NEXT: uOps Per Cycle: 0.33
# CHECK-NEXT: IPC: 0.33
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 3 1.00 imull %esi, %eax
+# CHECK-NEXT: 1 3 1.00 imull %esi, %eax
# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0