From: Simon Pilgrim Date: Tue, 22 Jan 2019 13:27:18 +0000 (+0000) Subject: [X86][BtVer2] SSE2 vector shifts has local forwarding disabled X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=cf74016e108659df02f081c8f876441d15e78d19;p=llvm [X86][BtVer2] SSE2 vector shifts has local forwarding disabled Similar to horizontal ops on D56777, the sse2 (but not mmx) bit shift ops has local forwarding disabled, adding +1cy to the use latency for the result. Differential Revision: https://reviews.llvm.org/D57026 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@351817 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td index cc997522db9..8d8de3e8e15 100644 --- a/lib/Target/X86/X86ScheduleBtVer2.td +++ b/lib/Target/X86/X86ScheduleBtVer2.td @@ -487,11 +487,11 @@ defm : JWriteResFpuPair; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; -defm : JWriteResFpuPair; +defm : JWriteResFpuPair; // +1cy latency. defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : JWriteResFpuPair; -defm : JWriteResFpuPair; +defm : JWriteResFpuPair; // +1cy latency. defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteResPairUnsupported; diff --git a/test/CodeGen/X86/sse2-schedule.ll b/test/CodeGen/X86/sse2-schedule.ll index 695359d0823..c40abf8cb5b 100644 --- a/test/CodeGen/X86/sse2-schedule.ll +++ b/test/CodeGen/X86/sse2-schedule.ll @@ -12146,16 +12146,16 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; BTVER2-SSE-LABEL: test_pslld: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:0.50] +; BTVER2-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: pslld $2, %xmm0 # sched: [2:0.50] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_pslld: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50] +; BTVER2-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BTVER2-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BTVER2-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [2:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_pslld: @@ -12393,16 +12393,16 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; ; BTVER2-SSE-LABEL: test_psllq: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:0.50] +; BTVER2-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: psllq $2, %xmm0 # sched: [2:0.50] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_psllq: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50] +; BTVER2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BTVER2-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BTVER2-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [2:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_psllq: @@ -12535,16 +12535,16 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; BTVER2-SSE-LABEL: test_psllw: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:0.50] +; BTVER2-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: psllw $2, %xmm0 # sched: [2:0.50] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_psllw: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50] +; BTVER2-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BTVER2-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BTVER2-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [2:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_psllw: @@ -12677,16 +12677,16 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; BTVER2-SSE-LABEL: test_psrad: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:0.50] +; BTVER2-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: psrad $2, %xmm0 # sched: [2:0.50] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_psrad: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50] +; BTVER2-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BTVER2-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BTVER2-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [2:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_psrad: @@ -12819,16 +12819,16 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; BTVER2-SSE-LABEL: test_psraw: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:0.50] +; BTVER2-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: psraw $2, %xmm0 # sched: [2:0.50] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_psraw: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50] +; BTVER2-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BTVER2-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BTVER2-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [2:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_psraw: @@ -12961,16 +12961,16 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; BTVER2-SSE-LABEL: test_psrld: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:0.50] +; BTVER2-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: psrld $2, %xmm0 # sched: [2:0.50] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_psrld: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50] +; BTVER2-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BTVER2-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BTVER2-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [2:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_psrld: @@ -13208,16 +13208,16 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; ; BTVER2-SSE-LABEL: test_psrlq: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:0.50] +; BTVER2-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: psrlq $2, %xmm0 # sched: [2:0.50] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_psrlq: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50] +; BTVER2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BTVER2-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BTVER2-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [2:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_psrlq: @@ -13350,16 +13350,16 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; BTVER2-SSE-LABEL: test_psrlw: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:0.50] +; BTVER2-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: psrlw $2, %xmm0 # sched: [2:0.50] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_psrlw: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50] +; BTVER2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BTVER2-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BTVER2-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [2:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_psrlw: diff --git a/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s b/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s index e893dd81569..a7d861392f2 100644 --- a/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s +++ b/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s @@ -1562,32 +1562,32 @@ vzeroupper # CHECK-NEXT: 1 6 1.00 * vpsignd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.50 vpsignw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 6 1.00 * vpsignw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpslld $1, %xmm0, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpslld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 6 1.00 * vpslld (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 2 0.50 vpslld $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 2 0.50 vpslld %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 7 1.00 * vpslld (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.50 vpslldq $1, %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpsllq $1, %xmm0, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpsllq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 6 1.00 * vpsllq (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpsllw $1, %xmm0, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpsllw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 6 1.00 * vpsllw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpsrad $1, %xmm0, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpsrad %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 6 1.00 * vpsrad (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpsraw $1, %xmm0, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpsraw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 6 1.00 * vpsraw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpsrld $1, %xmm0, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpsrld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 6 1.00 * vpsrld (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 2 0.50 vpsllq $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 2 0.50 vpsllq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 7 1.00 * vpsllq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 2 0.50 vpsllw $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 2 0.50 vpsllw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 7 1.00 * vpsllw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 2 0.50 vpsrad $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 2 0.50 vpsrad %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 7 1.00 * vpsrad (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 2 0.50 vpsraw $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 2 0.50 vpsraw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 7 1.00 * vpsraw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 2 0.50 vpsrld $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 2 0.50 vpsrld %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 7 1.00 * vpsrld (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.50 vpsrldq $1, %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpsrlq $1, %xmm0, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpsrlq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 6 1.00 * vpsrlq (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpsrlw $1, %xmm0, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpsrlw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 6 1.00 * vpsrlw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 2 0.50 vpsrlq $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 2 0.50 vpsrlq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 7 1.00 * vpsrlq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 2 0.50 vpsrlw $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 2 0.50 vpsrlw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 7 1.00 * vpsrlw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.50 vpsubb %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 6 1.00 * vpsubb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.50 vpsubd %xmm0, %xmm1, %xmm2 diff --git a/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s b/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s index 6254f95c095..d22bc447400 100644 --- a/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s +++ b/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s @@ -586,32 +586,32 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 6 1.00 * pshufhw $1, (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 pshuflw $1, %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * pshuflw $1, (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 pslld $1, %xmm2 -# CHECK-NEXT: 1 1 0.50 pslld %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * pslld (%rax), %xmm2 +# CHECK-NEXT: 1 2 0.50 pslld $1, %xmm2 +# CHECK-NEXT: 1 2 0.50 pslld %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * pslld (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 pslldq $1, %xmm2 -# CHECK-NEXT: 1 1 0.50 psllq $1, %xmm2 -# CHECK-NEXT: 1 1 0.50 psllq %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * psllq (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 psllw $1, %xmm2 -# CHECK-NEXT: 1 1 0.50 psllw %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * psllw (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 psrad $1, %xmm2 -# CHECK-NEXT: 1 1 0.50 psrad %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * psrad (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 psraw $1, %xmm2 -# CHECK-NEXT: 1 1 0.50 psraw %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * psraw (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 psrld $1, %xmm2 -# CHECK-NEXT: 1 1 0.50 psrld %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * psrld (%rax), %xmm2 +# CHECK-NEXT: 1 2 0.50 psllq $1, %xmm2 +# CHECK-NEXT: 1 2 0.50 psllq %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * psllq (%rax), %xmm2 +# CHECK-NEXT: 1 2 0.50 psllw $1, %xmm2 +# CHECK-NEXT: 1 2 0.50 psllw %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * psllw (%rax), %xmm2 +# CHECK-NEXT: 1 2 0.50 psrad $1, %xmm2 +# CHECK-NEXT: 1 2 0.50 psrad %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * psrad (%rax), %xmm2 +# CHECK-NEXT: 1 2 0.50 psraw $1, %xmm2 +# CHECK-NEXT: 1 2 0.50 psraw %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * psraw (%rax), %xmm2 +# CHECK-NEXT: 1 2 0.50 psrld $1, %xmm2 +# CHECK-NEXT: 1 2 0.50 psrld %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * psrld (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 psrldq $1, %xmm2 -# CHECK-NEXT: 1 1 0.50 psrlq $1, %xmm2 -# CHECK-NEXT: 1 1 0.50 psrlq %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * psrlq (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 psrlw $1, %xmm2 -# CHECK-NEXT: 1 1 0.50 psrlw %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * psrlw (%rax), %xmm2 +# CHECK-NEXT: 1 2 0.50 psrlq $1, %xmm2 +# CHECK-NEXT: 1 2 0.50 psrlq %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * psrlq (%rax), %xmm2 +# CHECK-NEXT: 1 2 0.50 psrlw $1, %xmm2 +# CHECK-NEXT: 1 2 0.50 psrlw %xmm0, %xmm2 +# CHECK-NEXT: 1 7 1.00 * psrlw (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 psubb %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * psubb (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 psubd %xmm0, %xmm2