From: Simon Pilgrim Date: Tue, 22 Jan 2019 13:13:57 +0000 (+0000) Subject: [X86][BtVer2] X86ISD::VPERMILPV has local forwarding disabled X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=928b0fdf96b471c73880fbdb28dd12a38db10e6a;p=llvm [X86][BtVer2] X86ISD::VPERMILPV has local forwarding disabled Similar to horizontal ops on D56777, the vpermilpd/vpermilps variable mask ops has local forwarding disabled, adding +1cy to the use latency for the result. Differential Revision: https://reviews.llvm.org/D57022 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@351815 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td index e81f8445b3e..cc997522db9 100644 --- a/lib/Target/X86/X86ScheduleBtVer2.td +++ b/lib/Target/X86/X86ScheduleBtVer2.td @@ -400,8 +400,8 @@ defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResYMMPair; defm : X86WriteResPairUnsupported; -defm : JWriteResFpuPair; -defm : JWriteResYMMPair; +defm : JWriteResFpuPair; // +1cy latency. +defm : JWriteResYMMPair; // +1cy latency. defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; defm : JWriteResYMMPair; diff --git a/test/CodeGen/X86/avx-schedule.ll b/test/CodeGen/X86/avx-schedule.ll index 6818ea06732..caf4c979e2e 100644 --- a/test/CodeGen/X86/avx-schedule.ll +++ b/test/CodeGen/X86/avx-schedule.ll @@ -4258,8 +4258,8 @@ define <2 x double> @test_permilvarpd(<2 x double> %a0, <2 x i64> %a1, <2 x i64> ; ; BTVER2-LABEL: test_permilvarpd: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; BTVER2-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:2.00] +; BTVER2-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; BTVER2-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [8:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_permilvarpd: @@ -4319,8 +4319,8 @@ define <4 x double> @test_permilvarpd_ymm(<4 x double> %a0, <4 x i64> %a1, <4 x ; ; BTVER2-LABEL: test_permilvarpd_ymm: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [3:3.00] -; BTVER2-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:3.00] +; BTVER2-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [4:3.00] +; BTVER2-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [9:3.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_permilvarpd_ymm: @@ -4380,8 +4380,8 @@ define <4 x float> @test_permilvarps(<4 x float> %a0, <4 x i32> %a1, <4 x i32> * ; ; BTVER2-LABEL: test_permilvarps: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; BTVER2-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:2.00] +; BTVER2-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; BTVER2-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [8:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_permilvarps: @@ -4441,8 +4441,8 @@ define <8 x float> @test_permilvarps_ymm(<8 x float> %a0, <8 x i32> %a1, <8 x i3 ; ; BTVER2-LABEL: test_permilvarps_ymm: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [3:3.00] -; BTVER2-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:3.00] +; BTVER2-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [4:3.00] +; BTVER2-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [9:3.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_permilvarps_ymm: diff --git a/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s b/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s index 36507b37aeb..e893dd81569 100644 --- a/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s +++ b/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s @@ -1433,20 +1433,20 @@ vzeroupper # CHECK-NEXT: 2 6 1.00 * vperm2f128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.50 vpermilpd $1, %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * vpermilpd $1, (%rax), %xmm2 -# CHECK-NEXT: 3 2 2.00 vpermilpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 3 7 2.00 * vpermilpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 3 3 2.00 vpermilpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 3 8 2.00 * vpermilpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 2 1 1.00 vpermilpd $1, %ymm0, %ymm2 # CHECK-NEXT: 2 6 2.00 * vpermilpd $1, (%rax), %ymm2 -# CHECK-NEXT: 6 3 3.00 vpermilpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 6 8 3.00 * vpermilpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 6 4 3.00 vpermilpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 6 9 3.00 * vpermilpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.50 vpermilps $1, %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * vpermilps $1, (%rax), %xmm2 -# CHECK-NEXT: 3 2 2.00 vpermilps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 3 7 2.00 * vpermilps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 3 3 2.00 vpermilps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 3 8 2.00 * vpermilps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 2 1 1.00 vpermilps $1, %ymm0, %ymm2 # CHECK-NEXT: 2 6 2.00 * vpermilps $1, (%rax), %ymm2 -# CHECK-NEXT: 6 3 3.00 vpermilps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 6 8 3.00 * vpermilps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 6 4 3.00 vpermilps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 6 9 3.00 * vpermilps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 3 1.00 vpextrb $1, %xmm0, %ecx # CHECK-NEXT: 1 3 1.00 * vpextrb $1, %xmm0, (%rax) # CHECK-NEXT: 1 3 1.00 vpextrd $1, %xmm0, %ecx