From 66b83ff99db7428d70deb131b22398e37a1e463c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 21 Jan 2019 18:04:25 +0000 Subject: [PATCH] [X86][BtVer2] Update latency of mmx horizontal operations D56777 added +1cy local forwarding penalty for horizontal operations, but this penalty only affects sse2/xmm variants, the mmx variants don't suffer the penalty. Confirmed with @andreadb git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@351755 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ScheduleBtVer2.td | 2 +- test/CodeGen/X86/mmx-schedule.ll | 24 +++++++++---------- .../llvm-mca/X86/BtVer2/resources-ssse3.s | 24 +++++++++---------- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td index f54e52ffa46..e81f8445b3e 100644 --- a/lib/Target/X86/X86ScheduleBtVer2.td +++ b/lib/Target/X86/X86ScheduleBtVer2.td @@ -577,7 +577,7 @@ defm : JWriteResFpuPair; // +1cy latency. defm : JWriteResYMMPair; // +1cy latency. -defm : JWriteResFpuPair; // +1cy latency. +defm : JWriteResFpuPair; defm : JWriteResFpuPair; // +1cy latency. defm : X86WriteResPairUnsupported; diff --git a/test/CodeGen/X86/mmx-schedule.ll b/test/CodeGen/X86/mmx-schedule.ll index 513332f61f1..51dc5e102ff 100644 --- a/test/CodeGen/X86/mmx-schedule.ll +++ b/test/CodeGen/X86/mmx-schedule.ll @@ -3368,8 +3368,8 @@ define i64 @test_phaddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; ; BTVER2-LABEL: test_phaddd: ; BTVER2: # %bb.0: -; BTVER2-NEXT: phaddd %mm1, %mm0 # sched: [2:0.50] -; BTVER2-NEXT: phaddd (%rdi), %mm0 # sched: [7:1.00] +; BTVER2-NEXT: phaddd %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: phaddd (%rdi), %mm0 # sched: [6:1.00] ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -3453,8 +3453,8 @@ define i64 @test_phaddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; ; BTVER2-LABEL: test_phaddsw: ; BTVER2: # %bb.0: -; BTVER2-NEXT: phaddsw %mm1, %mm0 # sched: [2:0.50] -; BTVER2-NEXT: phaddsw (%rdi), %mm0 # sched: [7:1.00] +; BTVER2-NEXT: phaddsw %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: phaddsw (%rdi), %mm0 # sched: [6:1.00] ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -3538,8 +3538,8 @@ define i64 @test_phaddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; ; BTVER2-LABEL: test_phaddw: ; BTVER2: # %bb.0: -; BTVER2-NEXT: phaddw %mm1, %mm0 # sched: [2:0.50] -; BTVER2-NEXT: phaddw (%rdi), %mm0 # sched: [7:1.00] +; BTVER2-NEXT: phaddw %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: phaddw (%rdi), %mm0 # sched: [6:1.00] ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -3623,8 +3623,8 @@ define i64 @test_phsubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; ; BTVER2-LABEL: test_phsubd: ; BTVER2: # %bb.0: -; BTVER2-NEXT: phsubd %mm1, %mm0 # sched: [2:0.50] -; BTVER2-NEXT: phsubd (%rdi), %mm0 # sched: [7:1.00] +; BTVER2-NEXT: phsubd %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: phsubd (%rdi), %mm0 # sched: [6:1.00] ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -3708,8 +3708,8 @@ define i64 @test_phsubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; ; BTVER2-LABEL: test_phsubsw: ; BTVER2: # %bb.0: -; BTVER2-NEXT: phsubsw %mm1, %mm0 # sched: [2:0.50] -; BTVER2-NEXT: phsubsw (%rdi), %mm0 # sched: [7:1.00] +; BTVER2-NEXT: phsubsw %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: phsubsw (%rdi), %mm0 # sched: [6:1.00] ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -3793,8 +3793,8 @@ define i64 @test_phsubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; ; BTVER2-LABEL: test_phsubw: ; BTVER2: # %bb.0: -; BTVER2-NEXT: phsubw %mm1, %mm0 # sched: [2:0.50] -; BTVER2-NEXT: phsubw (%rdi), %mm0 # sched: [7:1.00] +; BTVER2-NEXT: phsubw %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: phsubw (%rdi), %mm0 # sched: [6:1.00] ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; diff --git a/test/tools/llvm-mca/X86/BtVer2/resources-ssse3.s b/test/tools/llvm-mca/X86/BtVer2/resources-ssse3.s index 0b64d1da48b..e35a4745149 100644 --- a/test/tools/llvm-mca/X86/BtVer2/resources-ssse3.s +++ b/test/tools/llvm-mca/X86/BtVer2/resources-ssse3.s @@ -122,28 +122,28 @@ psignw (%rax), %xmm2 # CHECK-NEXT: 1 6 1.00 * palignr $1, (%rax), %mm2 # CHECK-NEXT: 1 1 0.50 palignr $1, %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * palignr $1, (%rax), %xmm2 -# CHECK-NEXT: 1 2 0.50 phaddd %mm0, %mm2 -# CHECK-NEXT: 1 7 1.00 * phaddd (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 phaddd %mm0, %mm2 +# CHECK-NEXT: 1 6 1.00 * phaddd (%rax), %mm2 # CHECK-NEXT: 1 2 0.50 phaddd %xmm0, %xmm2 # CHECK-NEXT: 1 7 1.00 * phaddd (%rax), %xmm2 -# CHECK-NEXT: 1 2 0.50 phaddsw %mm0, %mm2 -# CHECK-NEXT: 1 7 1.00 * phaddsw (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 phaddsw %mm0, %mm2 +# CHECK-NEXT: 1 6 1.00 * phaddsw (%rax), %mm2 # CHECK-NEXT: 1 2 0.50 phaddsw %xmm0, %xmm2 # CHECK-NEXT: 1 7 1.00 * phaddsw (%rax), %xmm2 -# CHECK-NEXT: 1 2 0.50 phaddw %mm0, %mm2 -# CHECK-NEXT: 1 7 1.00 * phaddw (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 phaddw %mm0, %mm2 +# CHECK-NEXT: 1 6 1.00 * phaddw (%rax), %mm2 # CHECK-NEXT: 1 2 0.50 phaddw %xmm0, %xmm2 # CHECK-NEXT: 1 7 1.00 * phaddw (%rax), %xmm2 -# CHECK-NEXT: 1 2 0.50 phsubd %mm0, %mm2 -# CHECK-NEXT: 1 7 1.00 * phsubd (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 phsubd %mm0, %mm2 +# CHECK-NEXT: 1 6 1.00 * phsubd (%rax), %mm2 # CHECK-NEXT: 1 2 0.50 phsubd %xmm0, %xmm2 # CHECK-NEXT: 1 7 1.00 * phsubd (%rax), %xmm2 -# CHECK-NEXT: 1 2 0.50 phsubsw %mm0, %mm2 -# CHECK-NEXT: 1 7 1.00 * phsubsw (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 phsubsw %mm0, %mm2 +# CHECK-NEXT: 1 6 1.00 * phsubsw (%rax), %mm2 # CHECK-NEXT: 1 2 0.50 phsubsw %xmm0, %xmm2 # CHECK-NEXT: 1 7 1.00 * phsubsw (%rax), %xmm2 -# CHECK-NEXT: 1 2 0.50 phsubw %mm0, %mm2 -# CHECK-NEXT: 1 7 1.00 * phsubw (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 phsubw %mm0, %mm2 +# CHECK-NEXT: 1 6 1.00 * phsubw (%rax), %mm2 # CHECK-NEXT: 1 2 0.50 phsubw %xmm0, %xmm2 # CHECK-NEXT: 1 7 1.00 * phsubw (%rax), %xmm2 # CHECK-NEXT: 1 2 1.00 pmaddubsw %mm0, %mm2 -- 2.50.1