From 139a55f53b96efb74c5746942ff27df712dd08f0 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 6 Oct 2017 13:40:29 +0000 Subject: [PATCH] [X86][SSE] Add SKX cpu tests to SSE/AVX scheduling tests (D38443) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315061 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/avx-schedule.ll | 577 ++++++++++++++++ test/CodeGen/X86/avx2-schedule.ll | 1009 ++++++++++++++++++++++++++++ test/CodeGen/X86/fma-schedule.ll | 193 ++++++ test/CodeGen/X86/sse-schedule.ll | 327 +++++++++ test/CodeGen/X86/sse2-schedule.ll | 830 +++++++++++++++++++++++ test/CodeGen/X86/sse3-schedule.ll | 77 +++ test/CodeGen/X86/sse41-schedule.ll | 313 +++++++++ test/CodeGen/X86/sse42-schedule.ll | 88 +++ test/CodeGen/X86/ssse3-schedule.ll | 100 +++ 9 files changed, 3514 insertions(+) diff --git a/test/CodeGen/X86/avx-schedule.ll b/test/CodeGen/X86/avx-schedule.ll index 12d41bc25e6..c79a711dd30 100644 --- a/test/CodeGen/X86/avx-schedule.ll +++ b/test/CodeGen/X86/avx-schedule.ll @@ -4,6 +4,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 @@ -32,6 +33,12 @@ define <4 x double> @test_addpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SKYLAKE-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_addpd: +; SKX: # BB#0: +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_addpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] @@ -74,6 +81,12 @@ define <8 x float> @test_addps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SKYLAKE-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_addps: +; SKX: # BB#0: +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_addps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] @@ -116,6 +129,12 @@ define <4 x double> @test_addsubpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; SKYLAKE-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_addsubpd: +; SKX: # BB#0: +; SKX-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_addsubpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] @@ -159,6 +178,12 @@ define <8 x float> @test_addsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> ; SKYLAKE-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_addsubps: +; SKX: # BB#0: +; SKX-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_addsubps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] @@ -206,6 +231,13 @@ define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_andnotpd: +; SKX: # BB#0: +; SKX-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_andnotpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -261,6 +293,13 @@ define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float> ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_andnotps: +; SKX: # BB#0: +; SKX-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_andnotps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -316,6 +355,13 @@ define <4 x double> @test_andpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_andpd: +; SKX: # BB#0: +; SKX-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_andpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -369,6 +415,13 @@ define <8 x float> @test_andps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_andps: +; SKX: # BB#0: +; SKX-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_andps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -422,6 +475,13 @@ define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x doubl ; SKYLAKE-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_blendpd: +; SKX: # BB#0: +; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50] +; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_blendpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50] @@ -467,6 +527,12 @@ define <8 x float> @test_blendps(<8 x float> %a0, <8 x float> %a1, <8 x float> * ; SKYLAKE-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_blendps: +; SKX: # BB#0: +; SKX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50] +; SKX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_blendps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50] @@ -509,6 +575,12 @@ define <4 x double> @test_blendvpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; SKYLAKE-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:0.67] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_blendvpd: +; SKX: # BB#0: +; SKX-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67] +; SKX-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:0.67] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_blendvpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] @@ -552,6 +624,12 @@ define <8 x float> @test_blendvps(<8 x float> %a0, <8 x float> %a1, <8 x float> ; SKYLAKE-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:0.67] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_blendvps: +; SKX: # BB#0: +; SKX-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67] +; SKX-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:0.67] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_blendvps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] @@ -591,6 +669,11 @@ define <8 x float> @test_broadcastf128(<4 x float> *%a0) { ; SKYLAKE-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_broadcastf128: +; SKX: # BB#0: +; SKX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_broadcastf128: ; BTVER2: # BB#0: ; BTVER2-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [6:1.00] @@ -626,6 +709,11 @@ define <4 x double> @test_broadcastsd_ymm(double *%a0) { ; SKYLAKE-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_broadcastsd_ymm: +; SKX: # BB#0: +; SKX-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_broadcastsd_ymm: ; BTVER2: # BB#0: ; BTVER2-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [6:1.00] @@ -662,6 +750,11 @@ define <4 x float> @test_broadcastss(float *%a0) { ; SKYLAKE-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_broadcastss: +; SKX: # BB#0: +; SKX-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_broadcastss: ; BTVER2: # BB#0: ; BTVER2-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [5:1.00] @@ -698,6 +791,11 @@ define <8 x float> @test_broadcastss_ymm(float *%a0) { ; SKYLAKE-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_broadcastss_ymm: +; SKX: # BB#0: +; SKX-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_broadcastss_ymm: ; BTVER2: # BB#0: ; BTVER2-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [6:1.00] @@ -742,6 +840,15 @@ define <4 x double> @test_cmppd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SKYLAKE-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cmppd: +; SKX: # BB#0: +; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 +; SKX-NEXT: vcmpeqpd (%rdi), %ymm0, %k1 +; SKX-NEXT: vpmovm2q %k0, %ymm0 +; SKX-NEXT: vpmovm2q %k1, %ymm1 +; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cmppd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] @@ -794,6 +901,15 @@ define <8 x float> @test_cmpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SKYLAKE-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cmpps: +; SKX: # BB#0: +; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 +; SKX-NEXT: vcmpeqps (%rdi), %ymm0, %k1 +; SKX-NEXT: vpmovm2d %k0, %ymm0 +; SKX-NEXT: vpmovm2d %k1, %ymm1 +; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cmpps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] @@ -846,6 +962,13 @@ define <4 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvtdq2pd: +; SKX: # BB#0: +; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00] +; SKX-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [7:1.00] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvtdq2pd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [8:1.00] @@ -897,6 +1020,13 @@ define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) { ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvtdq2ps: +; SKX: # BB#0: +; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [4:0.50] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvtdq2ps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [8:1.00] @@ -946,6 +1076,13 @@ define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) { ; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvtpd2dq: +; SKX: # BB#0: +; SKX-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00] +; SKX-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00] +; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvtpd2dq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00] @@ -995,6 +1132,13 @@ define <8 x float> @test_cvtpd2ps(<4 x double> %a0, <4 x double> *%a1) { ; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvtpd2ps: +; SKX: # BB#0: +; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00] +; SKX-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00] +; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvtpd2ps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00] @@ -1044,6 +1188,13 @@ define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) { ; SKYLAKE-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvtps2dq: +; SKX: # BB#0: +; SKX-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [4:0.50] +; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvtps2dq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [8:1.00] @@ -1089,6 +1240,12 @@ define <4 x double> @test_divpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SKYLAKE-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [14:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_divpd: +; SKX: # BB#0: +; SKX-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [14:1.00] +; SKX-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [14:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_divpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [38:38.00] @@ -1131,6 +1288,12 @@ define <8 x float> @test_divps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SKYLAKE-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [11:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_divps: +; SKX: # BB#0: +; SKX-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [11:1.00] +; SKX-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [11:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_divps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [38:38.00] @@ -1173,6 +1336,12 @@ define <8 x float> @test_dpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2 ; SKYLAKE-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [13:1.33] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_dpps: +; SKX: # BB#0: +; SKX-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [13:1.33] +; SKX-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [13:1.33] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_dpps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [3:1.00] @@ -1220,6 +1389,13 @@ define <4 x float> @test_extractf128(<8 x float> %a0, <8 x float> %a1, <4 x floa ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_extractf128: +; SKX: # BB#0: +; SKX-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00] +; SKX-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] +; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_extractf128: ; BTVER2: # BB#0: ; BTVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:0.50] @@ -1263,6 +1439,12 @@ define <4 x double> @test_haddpd(<4 x double> %a0, <4 x double> %a1, <4 x double ; SKYLAKE-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_haddpd: +; SKX: # BB#0: +; SKX-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00] +; SKX-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_haddpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] @@ -1306,6 +1488,12 @@ define <8 x float> @test_haddps(<8 x float> %a0, <8 x float> %a1, <8 x float> *% ; SKYLAKE-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [6:2.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_haddps: +; SKX: # BB#0: +; SKX-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [6:2.00] +; SKX-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [6:2.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_haddps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] @@ -1349,6 +1537,12 @@ define <4 x double> @test_hsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double ; SKYLAKE-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_hsubpd: +; SKX: # BB#0: +; SKX-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00] +; SKX-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_hsubpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] @@ -1392,6 +1586,12 @@ define <8 x float> @test_hsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *% ; SKYLAKE-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [6:2.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_hsubps: +; SKX: # BB#0: +; SKX-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [6:2.00] +; SKX-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [6:2.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_hsubps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] @@ -1439,6 +1639,13 @@ define <8 x float> @test_insertf128(<8 x float> %a0, <4 x float> %a1, <4 x float ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_insertf128: +; SKX: # BB#0: +; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] +; SKX-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_insertf128: ; BTVER2: # BB#0: ; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:0.50] @@ -1482,6 +1689,11 @@ define <32 x i8> @test_lddqu(i8* %a0) { ; SKYLAKE-NEXT: vlddqu (%rdi), %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_lddqu: +; SKX: # BB#0: +; SKX-NEXT: vlddqu (%rdi), %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_lddqu: ; BTVER2: # BB#0: ; BTVER2-NEXT: vlddqu (%rdi), %ymm0 # sched: [5:1.00] @@ -1525,6 +1737,13 @@ define <2 x double> @test_maskmovpd(i8* %a0, <2 x i64> %a1, <2 x double> %a2) { ; SKYLAKE-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_maskmovpd: +; SKX: # BB#0: +; SKX-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [1:0.50] +; SKX-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_maskmovpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 @@ -1574,6 +1793,13 @@ define <4 x double> @test_maskmovpd_ymm(i8* %a0, <4 x i64> %a1, <4 x double> %a2 ; SKYLAKE-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_maskmovpd_ymm: +; SKX: # BB#0: +; SKX-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [1:0.50] +; SKX-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_maskmovpd_ymm: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 @@ -1623,6 +1849,13 @@ define <4 x float> @test_maskmovps(i8* %a0, <4 x i32> %a1, <4 x float> %a2) { ; SKYLAKE-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_maskmovps: +; SKX: # BB#0: +; SKX-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [1:0.50] +; SKX-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_maskmovps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 @@ -1672,6 +1905,13 @@ define <8 x float> @test_maskmovps_ymm(i8* %a0, <8 x i32> %a1, <8 x float> %a2) ; SKYLAKE-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_maskmovps_ymm: +; SKX: # BB#0: +; SKX-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [1:0.50] +; SKX-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_maskmovps_ymm: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 @@ -1717,6 +1957,12 @@ define <4 x double> @test_maxpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SKYLAKE-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_maxpd: +; SKX: # BB#0: +; SKX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; SKX-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_maxpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] @@ -1760,6 +2006,12 @@ define <8 x float> @test_maxps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SKYLAKE-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_maxps: +; SKX: # BB#0: +; SKX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; SKX-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_maxps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] @@ -1803,6 +2055,12 @@ define <4 x double> @test_minpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SKYLAKE-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_minpd: +; SKX: # BB#0: +; SKX-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; SKX-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_minpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] @@ -1846,6 +2104,12 @@ define <8 x float> @test_minps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SKYLAKE-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_minps: +; SKX: # BB#0: +; SKX-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; SKX-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_minps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] @@ -1893,6 +2157,13 @@ define <4 x double> @test_movapd(<4 x double> *%a0, <4 x double> *%a1) { ; SKYLAKE-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movapd: +; SKX: # BB#0: +; SKX-NEXT: vmovapd (%rdi), %ymm0 # sched: [1:0.50] +; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movapd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovapd (%rdi), %ymm0 # sched: [5:1.00] @@ -1941,6 +2212,13 @@ define <8 x float> @test_movaps(<8 x float> *%a0, <8 x float> *%a1) { ; SKYLAKE-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movaps: +; SKX: # BB#0: +; SKX-NEXT: vmovaps (%rdi), %ymm0 # sched: [1:0.50] +; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movaps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovaps (%rdi), %ymm0 # sched: [5:1.00] @@ -1989,6 +2267,13 @@ define <4 x double> @test_movddup(<4 x double> %a0, <4 x double> *%a1) { ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movddup: +; SKX: # BB#0: +; SKX-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] +; SKX-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [1:0.50] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movddup: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [5:1.00] @@ -2034,6 +2319,12 @@ define i32 @test_movmskpd(<4 x double> %a0) { ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movmskpd: +; SKX: # BB#0: +; SKX-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00] +; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movmskpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovmskpd %ymm0, %eax # sched: [1:0.50] @@ -2074,6 +2365,12 @@ define i32 @test_movmskps(<8 x float> %a0) { ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movmskps: +; SKX: # BB#0: +; SKX-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00] +; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movmskps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovmskps %ymm0, %eax # sched: [1:0.50] @@ -2114,6 +2411,12 @@ define <4 x double> @test_movntpd(<4 x double> %a0, <4 x double> *%a1) { ; SKYLAKE-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movntpd: +; SKX: # BB#0: +; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movntpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:2.00] @@ -2155,6 +2458,12 @@ define <8 x float> @test_movntps(<8 x float> %a0, <8 x float> *%a1) { ; SKYLAKE-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movntps: +; SKX: # BB#0: +; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movntps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:2.00] @@ -2200,6 +2509,13 @@ define <8 x float> @test_movshdup(<8 x float> %a0, <8 x float> *%a1) { ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movshdup: +; SKX: # BB#0: +; SKX-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] +; SKX-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [1:0.50] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movshdup: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [5:1.00] @@ -2249,6 +2565,13 @@ define <8 x float> @test_movsldup(<8 x float> %a0, <8 x float> *%a1) { ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movsldup: +; SKX: # BB#0: +; SKX-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] +; SKX-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [1:0.50] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movsldup: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [5:1.00] @@ -2300,6 +2623,13 @@ define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) { ; SKYLAKE-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movupd: +; SKX: # BB#0: +; SKX-NEXT: vmovupd (%rdi), %ymm0 # sched: [1:0.50] +; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movupd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovupd (%rdi), %ymm0 # sched: [5:1.00] @@ -2350,6 +2680,13 @@ define <8 x float> @test_movups(<8 x float> *%a0, <8 x float> *%a1) { ; SKYLAKE-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movups: +; SKX: # BB#0: +; SKX-NEXT: vmovups (%rdi), %ymm0 # sched: [1:0.50] +; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movups: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovups (%rdi), %ymm0 # sched: [5:1.00] @@ -2394,6 +2731,12 @@ define <4 x double> @test_mulpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SKYLAKE-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_mulpd: +; SKX: # BB#0: +; SKX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_mulpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:4.00] @@ -2436,6 +2779,12 @@ define <8 x float> @test_mulps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SKYLAKE-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_mulps: +; SKX: # BB#0: +; SKX-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_mulps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00] @@ -2482,6 +2831,13 @@ define <4 x double> @orpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: orpd: +; SKX: # BB#0: +; SKX-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: orpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -2535,6 +2891,13 @@ define <8 x float> @test_orps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_orps: +; SKX: # BB#0: +; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_orps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -2588,6 +2951,13 @@ define <4 x double> @test_perm2f128(<4 x double> %a0, <4 x double> %a1, <4 x dou ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_perm2f128: +; SKX: # BB#0: +; SKX-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] +; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_perm2f128: ; BTVER2: # BB#0: ; BTVER2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:0.50] @@ -2637,6 +3007,13 @@ define <2 x double> @test_permilpd(<2 x double> %a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_permilpd: +; SKX: # BB#0: +; SKX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] +; SKX-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [1:1.00] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_permilpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [6:1.00] @@ -2686,6 +3063,13 @@ define <4 x double> @test_permilpd_ymm(<4 x double> %a0, <4 x double> *%a1) { ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_permilpd_ymm: +; SKX: # BB#0: +; SKX-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] +; SKX-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [1:1.00] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_permilpd_ymm: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [6:1.00] @@ -2735,6 +3119,13 @@ define <4 x float> @test_permilps(<4 x float> %a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_permilps: +; SKX: # BB#0: +; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] +; SKX-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [1:1.00] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_permilps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00] @@ -2784,6 +3175,13 @@ define <8 x float> @test_permilps_ymm(<8 x float> %a0, <8 x float> *%a1) { ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_permilps_ymm: +; SKX: # BB#0: +; SKX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] +; SKX-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [1:1.00] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_permilps_ymm: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [6:1.00] @@ -2829,6 +3227,12 @@ define <2 x double> @test_permilvarpd(<2 x double> %a0, <2 x i64> %a1, <2 x i64> ; SKYLAKE-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_permilvarpd: +; SKX: # BB#0: +; SKX-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_permilvarpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -2872,6 +3276,12 @@ define <4 x double> @test_permilvarpd_ymm(<4 x double> %a0, <4 x i64> %a1, <4 x ; SKYLAKE-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_permilvarpd_ymm: +; SKX: # BB#0: +; SKX-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_permilvarpd_ymm: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -2915,6 +3325,12 @@ define <4 x float> @test_permilvarps(<4 x float> %a0, <4 x i32> %a1, <4 x i32> * ; SKYLAKE-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_permilvarps: +; SKX: # BB#0: +; SKX-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_permilvarps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -2958,6 +3374,12 @@ define <8 x float> @test_permilvarps_ymm(<8 x float> %a0, <8 x i32> %a1, <8 x i3 ; SKYLAKE-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_permilvarps_ymm: +; SKX: # BB#0: +; SKX-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_permilvarps_ymm: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -3005,6 +3427,13 @@ define <8 x float> @test_rcpps(<8 x float> %a0, <8 x float> *%a1) { ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_rcpps: +; SKX: # BB#0: +; SKX-NEXT: vrcp14ps %ymm0, %ymm0 +; SKX-NEXT: vrcp14ps (%rdi), %ymm1 +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_rcpps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vrcpps (%rdi), %ymm1 # sched: [7:2.00] @@ -3055,6 +3484,13 @@ define <4 x double> @test_roundpd(<4 x double> %a0, <4 x double> *%a1) { ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_roundpd: +; SKX: # BB#0: +; SKX-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [8:0.67] +; SKX-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [8:0.67] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_roundpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [8:1.00] @@ -3105,6 +3541,13 @@ define <8 x float> @test_roundps(<8 x float> %a0, <8 x float> *%a1) { ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_roundps: +; SKX: # BB#0: +; SKX-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [8:0.67] +; SKX-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [8:0.67] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_roundps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [8:1.00] @@ -3155,6 +3598,13 @@ define <8 x float> @test_rsqrtps(<8 x float> %a0, <8 x float> *%a1) { ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_rsqrtps: +; SKX: # BB#0: +; SKX-NEXT: vrsqrt14ps %ymm0, %ymm0 +; SKX-NEXT: vrsqrt14ps (%rdi), %ymm1 +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_rsqrtps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [7:2.00] @@ -3205,6 +3655,13 @@ define <4 x double> @test_shufpd(<4 x double> %a0, <4 x double> %a1, <4 x double ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_shufpd: +; SKX: # BB#0: +; SKX-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] +; SKX-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [1:1.00] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_shufpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:0.50] @@ -3250,6 +3707,12 @@ define <8 x float> @test_shufps(<8 x float> %a0, <8 x float> %a1, <8 x float> *% ; SKYLAKE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_shufps: +; SKX: # BB#0: +; SKX-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] +; SKX-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_shufps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:0.50] @@ -3296,6 +3759,13 @@ define <4 x double> @test_sqrtpd(<4 x double> %a0, <4 x double> *%a1) { ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_sqrtpd: +; SKX: # BB#0: +; SKX-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [18:1.00] +; SKX-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [18:1.00] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_sqrtpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [59:54.00] @@ -3346,6 +3816,13 @@ define <8 x float> @test_sqrtps(<8 x float> %a0, <8 x float> *%a1) { ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_sqrtps: +; SKX: # BB#0: +; SKX-NEXT: vsqrtps %ymm0, %ymm0 # sched: [12:1.00] +; SKX-NEXT: vsqrtps (%rdi), %ymm1 # sched: [12:1.00] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_sqrtps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vsqrtps (%rdi), %ymm1 # sched: [47:42.00] @@ -3392,6 +3869,12 @@ define <4 x double> @test_subpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SKYLAKE-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_subpd: +; SKX: # BB#0: +; SKX-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_subpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] @@ -3434,6 +3917,12 @@ define <8 x float> @test_subps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SKYLAKE-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_subps: +; SKX: # BB#0: +; SKX-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_subps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] @@ -3488,6 +3977,15 @@ define i32 @test_testpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_testpd: +; SKX: # BB#0: +; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25] +; SKX-NEXT: vtestpd %xmm1, %xmm0 # sched: [2:1.00] +; SKX-NEXT: setb %al # sched: [1:1.00] +; SKX-NEXT: vtestpd (%rdi), %xmm0 # sched: [2:1.00] +; SKX-NEXT: adcl $0, %eax # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_testpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: xorl %eax, %eax # sched: [1:0.50] @@ -3554,6 +4052,16 @@ define i32 @test_testpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_testpd_ymm: +; SKX: # BB#0: +; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25] +; SKX-NEXT: vtestpd %ymm1, %ymm0 # sched: [2:1.00] +; SKX-NEXT: setb %al # sched: [1:1.00] +; SKX-NEXT: vtestpd (%rdi), %ymm0 # sched: [2:1.00] +; SKX-NEXT: adcl $0, %eax # sched: [1:1.00] +; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_testpd_ymm: ; BTVER2: # BB#0: ; BTVER2-NEXT: xorl %eax, %eax # sched: [1:0.50] @@ -3617,6 +4125,15 @@ define i32 @test_testps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_testps: +; SKX: # BB#0: +; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25] +; SKX-NEXT: vtestps %xmm1, %xmm0 # sched: [2:1.00] +; SKX-NEXT: setb %al # sched: [1:1.00] +; SKX-NEXT: vtestps (%rdi), %xmm0 # sched: [2:1.00] +; SKX-NEXT: adcl $0, %eax # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_testps: ; BTVER2: # BB#0: ; BTVER2-NEXT: xorl %eax, %eax # sched: [1:0.50] @@ -3683,6 +4200,16 @@ define i32 @test_testps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_testps_ymm: +; SKX: # BB#0: +; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25] +; SKX-NEXT: vtestps %ymm1, %ymm0 # sched: [2:1.00] +; SKX-NEXT: setb %al # sched: [1:1.00] +; SKX-NEXT: vtestps (%rdi), %ymm0 # sched: [2:1.00] +; SKX-NEXT: adcl $0, %eax # sched: [1:1.00] +; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_testps_ymm: ; BTVER2: # BB#0: ; BTVER2-NEXT: xorl %eax, %eax # sched: [1:0.50] @@ -3738,6 +4265,13 @@ define <4 x double> @test_unpckhpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_unpckhpd: +; SKX: # BB#0: +; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] +; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [1:1.00] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_unpckhpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:0.50] @@ -3783,6 +4317,12 @@ define <8 x float> @test_unpckhps(<8 x float> %a0, <8 x float> %a1, <8 x float> ; SKYLAKE-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_unpckhps: +; SKX: # BB#0: +; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] +; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_unpckhps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:0.50] @@ -3829,6 +4369,13 @@ define <4 x double> @test_unpcklpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_unpcklpd: +; SKX: # BB#0: +; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] +; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [1:1.00] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_unpcklpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:0.50] @@ -3874,6 +4421,12 @@ define <8 x float> @test_unpcklps(<8 x float> %a0, <8 x float> %a1, <8 x float> ; SKYLAKE-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_unpcklps: +; SKX: # BB#0: +; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] +; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_unpcklps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:0.50] @@ -3920,6 +4473,13 @@ define <4 x double> @test_xorpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_xorpd: +; SKX: # BB#0: +; SKX-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_xorpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -3973,6 +4533,13 @@ define <8 x float> @test_xorps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_xorps: +; SKX: # BB#0: +; SKX-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_xorps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -4018,6 +4585,11 @@ define void @test_zeroall() { ; SKYLAKE-NEXT: vzeroall # sched: [16:4.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_zeroall: +; SKX: # BB#0: +; SKX-NEXT: vzeroall # sched: [16:4.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_zeroall: ; BTVER2: # BB#0: ; BTVER2-NEXT: vzeroall # sched: [90:?] @@ -4053,6 +4625,11 @@ define void @test_zeroupper() { ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_zeroupper: +; SKX: # BB#0: +; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_zeroupper: ; BTVER2: # BB#0: ; BTVER2-NEXT: vzeroupper # sched: [46:?] diff --git a/test/CodeGen/X86/avx2-schedule.ll b/test/CodeGen/X86/avx2-schedule.ll index a35aec79061..0932577c0c6 100644 --- a/test/CodeGen/X86/avx2-schedule.ll +++ b/test/CodeGen/X86/avx2-schedule.ll @@ -2,6 +2,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 define <8 x i32> @test_broadcasti128(<8 x i32> %a0, <4 x i32> *%a1) { @@ -23,6 +24,12 @@ define <8 x i32> @test_broadcasti128(<8 x i32> %a0, <4 x i32> *%a1) { ; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_broadcasti128: +; SKX: # BB#0: +; SKX-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [1:0.50] +; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_broadcasti128: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [8:0.50] @@ -53,6 +60,12 @@ define <4 x double> @test_broadcastsd_ymm(<2 x double> %a0) { ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_broadcastsd_ymm: +; SKX: # BB#0: +; SKX-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00] +; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_broadcastsd_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [100:0.25] @@ -82,6 +95,12 @@ define <4 x float> @test_broadcastss(<4 x float> %a0) { ; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_broadcastss: +; SKX: # BB#0: +; SKX-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_broadcastss: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:0.50] @@ -111,6 +130,12 @@ define <8 x float> @test_broadcastss_ymm(<4 x float> %a0) { ; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_broadcastss_ymm: +; SKX: # BB#0: +; SKX-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00] +; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_broadcastss_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [100:0.25] @@ -149,6 +174,15 @@ define <4 x i32> @test_extracti128(<8 x i32> %a0, <8 x i32> %a1, <4 x i32> *%a2) ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_extracti128: +; SKX: # BB#0: +; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.50] +; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00] +; SKX-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00] +; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_extracti128: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.25] @@ -181,6 +215,11 @@ define <2 x double> @test_gatherdpd(<2 x double> %a0, i8* %a1, <4 x i32> %a2, <2 ; SKYLAKE-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_gatherdpd: +; SKX: # BB#0: +; SKX-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_gatherdpd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?] @@ -206,6 +245,11 @@ define <4 x double> @test_gatherdpd_ymm(<4 x double> %a0, i8* %a1, <4 x i32> %a2 ; SKYLAKE-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [20:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_gatherdpd_ymm: +; SKX: # BB#0: +; SKX-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [20:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_gatherdpd_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [100:?] @@ -231,6 +275,11 @@ define <4 x float> @test_gatherdps(<4 x float> %a0, i8* %a1, <4 x i32> %a2, <4 x ; SKYLAKE-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_gatherdps: +; SKX: # BB#0: +; SKX-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_gatherdps: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?] @@ -256,6 +305,11 @@ define <8 x float> @test_gatherdps_ymm(<8 x float> %a0, i8* %a1, <8 x i32> %a2, ; SKYLAKE-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [20:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_gatherdps_ymm: +; SKX: # BB#0: +; SKX-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [20:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_gatherdps_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [100:?] @@ -281,6 +335,11 @@ define <2 x double> @test_gatherqpd(<2 x double> %a0, i8* %a1, <2 x i64> %a2, <2 ; SKYLAKE-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_gatherqpd: +; SKX: # BB#0: +; SKX-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_gatherqpd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?] @@ -306,6 +365,11 @@ define <4 x double> @test_gatherqpd_ymm(<4 x double> %a0, i8* %a1, <4 x i64> %a2 ; SKYLAKE-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [20:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_gatherqpd_ymm: +; SKX: # BB#0: +; SKX-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [20:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_gatherqpd_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [100:?] @@ -331,6 +395,11 @@ define <4 x float> @test_gatherqps(<4 x float> %a0, i8* %a1, <2 x i64> %a2, <4 x ; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_gatherqps: +; SKX: # BB#0: +; SKX-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_gatherqps: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?] @@ -359,6 +428,12 @@ define <4 x float> @test_gatherqps_ymm(<4 x float> %a0, i8* %a1, <4 x i64> %a2, ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_gatherqps_ymm: +; SKX: # BB#0: +; SKX-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [20:1.00] +; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_gatherqps_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [100:?] @@ -391,6 +466,13 @@ define <8 x i32> @test_inserti128(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) ; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_inserti128: +; SKX: # BB#0: +; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] +; SKX-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_inserti128: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [2:0.25] @@ -422,6 +504,11 @@ define <4 x i64> @test_movntdqa(i8* %a0) { ; SKYLAKE-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movntdqa: +; SKX: # BB#0: +; SKX-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_movntdqa: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [8:0.50] @@ -450,6 +537,12 @@ define <16 x i16> @test_mpsadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; SKYLAKE-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [4:2.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_mpsadbw: +; SKX: # BB#0: +; SKX-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [4:2.00] +; SKX-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [4:2.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_mpsadbw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [100:?] @@ -485,6 +578,13 @@ define <32 x i8> @test_pabsb(<32 x i8> %a0, <32 x i8> *%a1) { ; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pabsb: +; SKX: # BB#0: +; SKX-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpabsb (%rdi), %ymm1 # sched: [1:0.50] +; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pabsb: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50] @@ -521,6 +621,13 @@ define <8 x i32> @test_pabsd(<8 x i32> %a0, <8 x i32> *%a1) { ; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pabsd: +; SKX: # BB#0: +; SKX-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpabsd (%rdi), %ymm1 # sched: [1:0.50] +; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pabsd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50] @@ -557,6 +664,13 @@ define <16 x i16> @test_pabsw(<16 x i16> %a0, <16 x i16> *%a1) { ; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pabsw: +; SKX: # BB#0: +; SKX-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpabsw (%rdi), %ymm1 # sched: [1:0.50] +; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pabsw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50] @@ -590,6 +704,12 @@ define <16 x i16> @test_packssdw(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; SKYLAKE-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_packssdw: +; SKX: # BB#0: +; SKX-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_packssdw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -622,6 +742,12 @@ define <32 x i8> @test_packsswb(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; SKYLAKE-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_packsswb: +; SKX: # BB#0: +; SKX-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_packsswb: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -654,6 +780,12 @@ define <16 x i16> @test_packusdw(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; SKYLAKE-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_packusdw: +; SKX: # BB#0: +; SKX-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_packusdw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -686,6 +818,12 @@ define <32 x i8> @test_packuswb(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; SKYLAKE-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_packuswb: +; SKX: # BB#0: +; SKX-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_packuswb: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -718,6 +856,12 @@ define <32 x i8> @test_paddb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; SKYLAKE-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_paddb: +; SKX: # BB#0: +; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_paddb: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -748,6 +892,12 @@ define <8 x i32> @test_paddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; SKYLAKE-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_paddd: +; SKX: # BB#0: +; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_paddd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -778,6 +928,12 @@ define <4 x i64> @test_paddq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; SKYLAKE-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_paddq: +; SKX: # BB#0: +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_paddq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -808,6 +964,12 @@ define <32 x i8> @test_paddsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; SKYLAKE-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_paddsb: +; SKX: # BB#0: +; SKX-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_paddsb: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -839,6 +1001,12 @@ define <16 x i16> @test_paddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; SKYLAKE-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_paddsw: +; SKX: # BB#0: +; SKX-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_paddsw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -870,6 +1038,12 @@ define <32 x i8> @test_paddusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; SKYLAKE-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_paddusb: +; SKX: # BB#0: +; SKX-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_paddusb: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -901,6 +1075,12 @@ define <16 x i16> @test_paddusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; SKYLAKE-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_paddusw: +; SKX: # BB#0: +; SKX-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_paddusw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -932,6 +1112,12 @@ define <16 x i16> @test_paddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { ; SKYLAKE-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_paddw: +; SKX: # BB#0: +; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_paddw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -962,6 +1148,12 @@ define <32 x i8> @test_palignr(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; SKYLAKE-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_palignr: +; SKX: # BB#0: +; SKX-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] +; SKX-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_palignr: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:0.25] @@ -995,6 +1187,13 @@ define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pand: +; SKX: # BB#0: +; SKX-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pand: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -1030,6 +1229,13 @@ define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pandn: +; SKX: # BB#0: +; SKX-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [1:0.50] +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pandn: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -1064,6 +1270,12 @@ define <32 x i8> @test_pavgb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; SKYLAKE-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pavgb: +; SKX: # BB#0: +; SKX-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pavgb: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -1104,6 +1316,12 @@ define <16 x i16> @test_pavgw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { ; SKYLAKE-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pavgw: +; SKX: # BB#0: +; SKX-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pavgw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -1147,6 +1365,13 @@ define <4 x i32> @test_pblendd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pblendd: +; SKX: # BB#0: +; SKX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.50] +; SKX-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [1:0.50] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pblendd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.50] @@ -1182,6 +1407,13 @@ define <8 x i32> @test_pblendd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pblendd_ymm: +; SKX: # BB#0: +; SKX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50] +; SKX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [1:0.50] +; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pblendd_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50] @@ -1214,6 +1446,12 @@ define <32 x i8> @test_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2, <32 ; SKYLAKE-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [2:0.67] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pblendvb: +; SKX: # BB#0: +; SKX-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67] +; SKX-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [2:0.67] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pblendvb: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00] @@ -1245,6 +1483,12 @@ define <16 x i16> @test_pblendw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; SKYLAKE-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pblendw: +; SKX: # BB#0: +; SKX-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00] +; SKX-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pblendw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [2:0.33] @@ -1278,6 +1522,13 @@ define <16 x i8> @test_pbroadcastb(<16 x i8> %a0, <16 x i8> *%a1) { ; SKYLAKE-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pbroadcastb: +; SKX: # BB#0: +; SKX-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00] +; SKX-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [1:1.00] +; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pbroadcastb: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [8:1.00] @@ -1313,6 +1564,13 @@ define <32 x i8> @test_pbroadcastb_ymm(<32 x i8> %a0, <32 x i8> *%a1) { ; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pbroadcastb_ymm: +; SKX: # BB#0: +; SKX-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00] +; SKX-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [1:1.00] +; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pbroadcastb_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:2.00] @@ -1348,6 +1606,12 @@ define <4 x i32> @test_pbroadcastd(<4 x i32> %a0, <4 x i32> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pbroadcastd: +; SKX: # BB#0: +; SKX-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm0 +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pbroadcastd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [8:0.50] @@ -1383,6 +1647,12 @@ define <8 x i32> @test_pbroadcastd_ymm(<8 x i32> %a0, <8 x i32> *%a1) { ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pbroadcastd_ymm: +; SKX: # BB#0: +; SKX-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00] +; SKX-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm0 +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pbroadcastd_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [8:0.50] @@ -1418,6 +1688,12 @@ define <2 x i64> @test_pbroadcastq(<2 x i64> %a0, <2 x i64> *%a1) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pbroadcastq: +; SKX: # BB#0: +; SKX-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpaddq (%rdi){1to2}, %xmm0, %xmm0 +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pbroadcastq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [8:0.50] @@ -1453,6 +1729,12 @@ define <4 x i64> @test_pbroadcastq_ymm(<4 x i64> %a0, <4 x i64> *%a1) { ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pbroadcastq_ymm: +; SKX: # BB#0: +; SKX-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00] +; SKX-NEXT: vpaddq (%rdi){1to4}, %ymm0, %ymm0 +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pbroadcastq_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [8:0.50] @@ -1488,6 +1770,13 @@ define <8 x i16> @test_pbroadcastw(<8 x i16> %a0, <8 x i16> *%a1) { ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pbroadcastw: +; SKX: # BB#0: +; SKX-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00] +; SKX-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [1:1.00] +; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pbroadcastw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [8:1.00] @@ -1523,6 +1812,13 @@ define <16 x i16> @test_pbroadcastw_ymm(<16 x i16> %a0, <16 x i16> *%a1) { ; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pbroadcastw_ymm: +; SKX: # BB#0: +; SKX-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00] +; SKX-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [1:1.00] +; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pbroadcastw_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:2.00] @@ -1555,6 +1851,14 @@ define <32 x i8> @test_pcmpeqb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; SKYLAKE-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pcmpeqb: +; SKX: # BB#0: +; SKX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 +; SKX-NEXT: vpmovm2b %k0, %ymm0 +; SKX-NEXT: vpcmpeqb (%rdi), %ymm0, %k0 +; SKX-NEXT: vpmovm2b %k0, %ymm0 +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pcmpeqb: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -1587,6 +1891,14 @@ define <8 x i32> @test_pcmpeqd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; SKYLAKE-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pcmpeqd: +; SKX: # BB#0: +; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 +; SKX-NEXT: vpmovm2d %k0, %ymm0 +; SKX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 +; SKX-NEXT: vpmovm2d %k0, %ymm0 +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pcmpeqd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -1619,6 +1931,14 @@ define <4 x i64> @test_pcmpeqq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; SKYLAKE-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pcmpeqq: +; SKX: # BB#0: +; SKX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 +; SKX-NEXT: vpmovm2q %k0, %ymm0 +; SKX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 +; SKX-NEXT: vpmovm2q %k0, %ymm0 +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pcmpeqq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -1651,6 +1971,14 @@ define <16 x i16> @test_pcmpeqw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; SKYLAKE-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pcmpeqw: +; SKX: # BB#0: +; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 +; SKX-NEXT: vpmovm2w %k0, %ymm0 +; SKX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0 +; SKX-NEXT: vpmovm2w %k0, %ymm0 +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pcmpeqw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -1683,6 +2011,14 @@ define <32 x i8> @test_pcmpgtb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; SKYLAKE-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pcmpgtb: +; SKX: # BB#0: +; SKX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 +; SKX-NEXT: vpmovm2b %k0, %ymm0 +; SKX-NEXT: vpcmpgtb (%rdi), %ymm0, %k0 +; SKX-NEXT: vpmovm2b %k0, %ymm0 +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pcmpgtb: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -1715,6 +2051,14 @@ define <8 x i32> @test_pcmpgtd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; SKYLAKE-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pcmpgtd: +; SKX: # BB#0: +; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 +; SKX-NEXT: vpmovm2d %k0, %ymm0 +; SKX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0 +; SKX-NEXT: vpmovm2d %k0, %ymm0 +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pcmpgtd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -1747,6 +2091,14 @@ define <4 x i64> @test_pcmpgtq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; SKYLAKE-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [3:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pcmpgtq: +; SKX: # BB#0: +; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 +; SKX-NEXT: vpmovm2q %k0, %ymm0 +; SKX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 +; SKX-NEXT: vpmovm2q %k0, %ymm0 +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pcmpgtq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -1779,6 +2131,14 @@ define <16 x i16> @test_pcmpgtw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; SKYLAKE-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pcmpgtw: +; SKX: # BB#0: +; SKX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 +; SKX-NEXT: vpmovm2w %k0, %ymm0 +; SKX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0 +; SKX-NEXT: vpmovm2w %k0, %ymm0 +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pcmpgtw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -1814,6 +2174,13 @@ define <4 x i64> @test_perm2i128(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_perm2i128: +; SKX: # BB#0: +; SKX-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] +; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_perm2i128: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [2:0.25] @@ -1849,6 +2216,13 @@ define <8 x i32> @test_permd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_permd: +; SKX: # BB#0: +; SKX-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] +; SKX-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_permd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [2:0.25] @@ -1885,6 +2259,13 @@ define <4 x double> @test_permpd(<4 x double> %a0, <4 x double> *%a1) { ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_permpd: +; SKX: # BB#0: +; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] +; SKX-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [3:1.00] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_permpd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [107:0.50] @@ -1920,6 +2301,13 @@ define <8 x float> @test_permps(<8 x i32> %a0, <8 x float> %a1, <8 x float> *%a2 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_permps: +; SKX: # BB#0: +; SKX-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] +; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_permps: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [100:0.25] @@ -1956,6 +2344,13 @@ define <4 x i64> @test_permq(<4 x i64> %a0, <4 x i64> *%a1) { ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_permq: +; SKX: # BB#0: +; SKX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] +; SKX-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [3:1.00] +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_permq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:0.50] @@ -1985,6 +2380,11 @@ define <4 x i32> @test_pgatherdd(<4 x i32> %a0, i8* %a1, <4 x i32> %a2, <4 x i32 ; SKYLAKE-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pgatherdd: +; SKX: # BB#0: +; SKX-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pgatherdd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?] @@ -2010,6 +2410,11 @@ define <8 x i32> @test_pgatherdd_ymm(<8 x i32> %a0, i8* %a1, <8 x i32> %a2, <8 x ; SKYLAKE-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [20:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pgatherdd_ymm: +; SKX: # BB#0: +; SKX-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [20:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pgatherdd_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [100:?] @@ -2035,6 +2440,11 @@ define <2 x i64> @test_pgatherdq(<2 x i64> %a0, i8* %a1, <4 x i32> %a2, <2 x i64 ; SKYLAKE-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pgatherdq: +; SKX: # BB#0: +; SKX-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pgatherdq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?] @@ -2060,6 +2470,11 @@ define <4 x i64> @test_pgatherdq_ymm(<4 x i64> %a0, i8* %a1, <4 x i32> %a2, <4 x ; SKYLAKE-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [20:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pgatherdq_ymm: +; SKX: # BB#0: +; SKX-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [20:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pgatherdq_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [100:?] @@ -2085,6 +2500,11 @@ define <4 x i32> @test_pgatherqd(<4 x i32> %a0, i8* %a1, <2 x i64> %a2, <4 x i32 ; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pgatherqd: +; SKX: # BB#0: +; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pgatherqd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?] @@ -2113,6 +2533,12 @@ define <4 x i32> @test_pgatherqd_ymm(<4 x i32> %a0, i8* %a1, <4 x i64> %a2, <4 x ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pgatherqd_ymm: +; SKX: # BB#0: +; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [20:1.00] +; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pgatherqd_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [100:?] @@ -2139,6 +2565,11 @@ define <2 x i64> @test_pgatherqq(<2 x i64> %a0, i8 *%a1, <2 x i64> %a2, <2 x i64 ; SKYLAKE-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pgatherqq: +; SKX: # BB#0: +; SKX-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pgatherqq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?] @@ -2164,6 +2595,11 @@ define <4 x i64> @test_pgatherqq_ymm(<4 x i64> %a0, i8 *%a1, <4 x i64> %a2, <4 x ; SKYLAKE-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [20:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pgatherqq_ymm: +; SKX: # BB#0: +; SKX-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [20:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pgatherqq_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [100:?] @@ -2192,6 +2628,12 @@ define <8 x i32> @test_phaddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; SKYLAKE-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_phaddd: +; SKX: # BB#0: +; SKX-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; SKX-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_phaddd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [100:?] @@ -2223,6 +2665,12 @@ define <16 x i16> @test_phaddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; SKYLAKE-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_phaddsw: +; SKX: # BB#0: +; SKX-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; SKX-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_phaddsw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [100:?] @@ -2254,6 +2702,12 @@ define <16 x i16> @test_phaddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; SKYLAKE-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_phaddw: +; SKX: # BB#0: +; SKX-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; SKX-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_phaddw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [100:?] @@ -2285,6 +2739,12 @@ define <8 x i32> @test_phsubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; SKYLAKE-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_phsubd: +; SKX: # BB#0: +; SKX-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; SKX-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_phsubd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [100:?] @@ -2316,6 +2776,12 @@ define <16 x i16> @test_phsubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; SKYLAKE-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_phsubsw: +; SKX: # BB#0: +; SKX-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; SKX-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_phsubsw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [100:?] @@ -2347,6 +2813,12 @@ define <16 x i16> @test_phsubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; SKYLAKE-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_phsubw: +; SKX: # BB#0: +; SKX-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; SKX-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_phsubw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [100:?] @@ -2378,6 +2850,12 @@ define <16 x i16> @test_pmaddubsw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) ; SKYLAKE-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmaddubsw: +; SKX: # BB#0: +; SKX-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmaddubsw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:1.00] @@ -2410,6 +2888,12 @@ define <8 x i32> @test_pmaddwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; SKYLAKE-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmaddwd: +; SKX: # BB#0: +; SKX-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmaddwd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:1.00] @@ -2445,6 +2929,13 @@ define <4 x i32> @test_pmaskmovd(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) { ; SKYLAKE-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmaskmovd: +; SKX: # BB#0: +; SKX-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [1:0.50] +; SKX-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmaskmovd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [100:?] @@ -2480,6 +2971,13 @@ define <8 x i32> @test_pmaskmovd_ymm(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) { ; SKYLAKE-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmaskmovd_ymm: +; SKX: # BB#0: +; SKX-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [1:0.50] +; SKX-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmaskmovd_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [100:?] @@ -2515,6 +3013,13 @@ define <2 x i64> @test_pmaskmovq(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) { ; SKYLAKE-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmaskmovq: +; SKX: # BB#0: +; SKX-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [1:0.50] +; SKX-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmaskmovq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:1.00] @@ -2550,6 +3055,13 @@ define <4 x i64> @test_pmaskmovq_ymm(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) { ; SKYLAKE-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmaskmovq_ymm: +; SKX: # BB#0: +; SKX-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [1:0.50] +; SKX-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmaskmovq_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:1.50] @@ -2582,6 +3094,12 @@ define <32 x i8> @test_pmaxsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; SKYLAKE-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmaxsb: +; SKX: # BB#0: +; SKX-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmaxsb: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -2613,6 +3131,12 @@ define <8 x i32> @test_pmaxsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; SKYLAKE-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmaxsd: +; SKX: # BB#0: +; SKX-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmaxsd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -2644,6 +3168,12 @@ define <16 x i16> @test_pmaxsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; SKYLAKE-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmaxsw: +; SKX: # BB#0: +; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmaxsw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -2675,6 +3205,12 @@ define <32 x i8> @test_pmaxub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; SKYLAKE-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmaxub: +; SKX: # BB#0: +; SKX-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmaxub: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -2706,6 +3242,12 @@ define <8 x i32> @test_pmaxud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; SKYLAKE-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmaxud: +; SKX: # BB#0: +; SKX-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmaxud: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -2737,6 +3279,12 @@ define <16 x i16> @test_pmaxuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; SKYLAKE-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmaxuw: +; SKX: # BB#0: +; SKX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmaxuw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -2768,6 +3316,12 @@ define <32 x i8> @test_pminsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; SKYLAKE-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pminsb: +; SKX: # BB#0: +; SKX-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pminsb: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -2799,6 +3353,12 @@ define <8 x i32> @test_pminsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; SKYLAKE-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pminsd: +; SKX: # BB#0: +; SKX-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pminsd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -2830,6 +3390,12 @@ define <16 x i16> @test_pminsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; SKYLAKE-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pminsw: +; SKX: # BB#0: +; SKX-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pminsw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -2861,6 +3427,12 @@ define <32 x i8> @test_pminub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; SKYLAKE-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pminub: +; SKX: # BB#0: +; SKX-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pminub: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -2892,6 +3464,12 @@ define <8 x i32> @test_pminud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; SKYLAKE-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pminud: +; SKX: # BB#0: +; SKX-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pminud: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -2923,6 +3501,12 @@ define <16 x i16> @test_pminuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; SKYLAKE-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pminuw: +; SKX: # BB#0: +; SKX-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pminuw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -2954,6 +3538,12 @@ define i32 @test_pmovmskb(<32 x i8> %a0) { ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmovmskb: +; SKX: # BB#0: +; SKX-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00] +; SKX-NEXT: vzeroupper # sched: [4:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmovmskb: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00] @@ -2986,6 +3576,13 @@ define <8 x i32> @test_pmovsxbd(<16 x i8> %a0, <16 x i8> *%a1) { ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmovsxbd: +; SKX: # BB#0: +; SKX-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00] +; SKX-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [3:1.00] +; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmovsxbd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:0.50] @@ -3023,6 +3620,13 @@ define <4 x i64> @test_pmovsxbq(<16 x i8> %a0, <16 x i8> *%a1) { ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmovsxbq: +; SKX: # BB#0: +; SKX-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00] +; SKX-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [3:1.00] +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmovsxbq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:0.50] @@ -3060,6 +3664,13 @@ define <16 x i16> @test_pmovsxbw(<16 x i8> %a0, <16 x i8> *%a1) { ; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmovsxbw: +; SKX: # BB#0: +; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00] +; SKX-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [3:1.00] +; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmovsxbw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:0.50] @@ -3095,6 +3706,13 @@ define <4 x i64> @test_pmovsxdq(<4 x i32> %a0, <4 x i32> *%a1) { ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmovsxdq: +; SKX: # BB#0: +; SKX-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00] +; SKX-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [3:1.00] +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmovsxdq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:0.50] @@ -3130,6 +3748,13 @@ define <8 x i32> @test_pmovsxwd(<8 x i16> %a0, <8 x i16> *%a1) { ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmovsxwd: +; SKX: # BB#0: +; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] +; SKX-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [3:1.00] +; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmovsxwd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:0.50] @@ -3165,6 +3790,13 @@ define <4 x i64> @test_pmovsxwq(<8 x i16> %a0, <8 x i16> *%a1) { ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmovsxwq: +; SKX: # BB#0: +; SKX-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00] +; SKX-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [3:1.00] +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmovsxwq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:0.50] @@ -3202,6 +3834,13 @@ define <8 x i32> @test_pmovzxbd(<16 x i8> %a0, <16 x i8> *%a1) { ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmovzxbd: +; SKX: # BB#0: +; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] +; SKX-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [3:1.00] +; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmovzxbd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:0.50] @@ -3239,6 +3878,13 @@ define <4 x i64> @test_pmovzxbq(<16 x i8> %a0, <16 x i8> *%a1) { ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmovzxbq: +; SKX: # BB#0: +; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] +; SKX-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmovzxbq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50] @@ -3276,6 +3922,13 @@ define <16 x i16> @test_pmovzxbw(<16 x i8> %a0, <16 x i8> *%a1) { ; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmovzxbw: +; SKX: # BB#0: +; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] +; SKX-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [3:1.00] +; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmovzxbw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:0.50] @@ -3311,6 +3964,13 @@ define <4 x i64> @test_pmovzxdq(<4 x i32> %a0, <4 x i32> *%a1) { ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmovzxdq: +; SKX: # BB#0: +; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] +; SKX-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [3:1.00] +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmovzxdq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50] @@ -3346,6 +4006,13 @@ define <8 x i32> @test_pmovzxwd(<8 x i16> %a0, <8 x i16> *%a1) { ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmovzxwd: +; SKX: # BB#0: +; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] +; SKX-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [3:1.00] +; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmovzxwd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50] @@ -3381,6 +4048,13 @@ define <4 x i64> @test_pmovzxwq(<8 x i16> %a0, <8 x i16> *%a1) { ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmovzxwq: +; SKX: # BB#0: +; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00] +; SKX-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [3:1.00] +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmovzxwq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50] @@ -3415,6 +4089,12 @@ define <4 x i64> @test_pmuldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; SKYLAKE-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmuldq: +; SKX: # BB#0: +; SKX-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmuldq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:1.00] @@ -3447,6 +4127,12 @@ define <16 x i16> @test_pmulhrsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2 ; SKYLAKE-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmulhrsw: +; SKX: # BB#0: +; SKX-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmulhrsw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:1.00] @@ -3478,6 +4164,12 @@ define <16 x i16> @test_pmulhuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; SKYLAKE-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmulhuw: +; SKX: # BB#0: +; SKX-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmulhuw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:1.00] @@ -3509,6 +4201,12 @@ define <16 x i16> @test_pmulhw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; SKYLAKE-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmulhw: +; SKX: # BB#0: +; SKX-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmulhw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:1.00] @@ -3540,6 +4238,12 @@ define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; SKYLAKE-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [8:0.67] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmulld: +; SKX: # BB#0: +; SKX-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [8:0.67] +; SKX-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [8:0.67] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmulld: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:2.00] @@ -3570,6 +4274,12 @@ define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; SKYLAKE-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmullw: +; SKX: # BB#0: +; SKX-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmullw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:1.00] @@ -3600,6 +4310,12 @@ define <4 x i64> @test_pmuludq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; SKYLAKE-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmuludq: +; SKX: # BB#0: +; SKX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pmuludq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:1.00] @@ -3635,6 +4351,13 @@ define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_por: +; SKX: # BB#0: +; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_por: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -3667,6 +4390,12 @@ define <4 x i64> @test_psadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; SKYLAKE-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [3:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psadbw: +; SKX: # BB#0: +; SKX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; SKX-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psadbw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [4:1.00] @@ -3699,6 +4428,12 @@ define <32 x i8> @test_pshufb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; SKYLAKE-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pshufb: +; SKX: # BB#0: +; SKX-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pshufb: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -3733,6 +4468,13 @@ define <8 x i32> @test_pshufd(<8 x i32> %a0, <8 x i32> *%a1) { ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pshufd: +; SKX: # BB#0: +; SKX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] +; SKX-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [1:1.00] +; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pshufd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:0.50] @@ -3768,6 +4510,13 @@ define <16 x i16> @test_pshufhw(<16 x i16> %a0, <16 x i16> *%a1) { ; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pshufhw: +; SKX: # BB#0: +; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00] +; SKX-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [1:1.00] +; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pshufhw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:0.50] @@ -3803,6 +4552,13 @@ define <16 x i16> @test_pshuflw(<16 x i16> %a0, <16 x i16> *%a1) { ; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pshuflw: +; SKX: # BB#0: +; SKX-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00] +; SKX-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [1:1.00] +; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pshuflw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:0.50] @@ -3835,6 +4591,12 @@ define <32 x i8> @test_psignb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; SKYLAKE-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psignb: +; SKX: # BB#0: +; SKX-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psignb: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -3866,6 +4628,12 @@ define <8 x i32> @test_psignd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; SKYLAKE-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psignd: +; SKX: # BB#0: +; SKX-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psignd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -3897,6 +4665,12 @@ define <16 x i16> @test_psignw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; SKYLAKE-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psignw: +; SKX: # BB#0: +; SKX-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psignw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -3931,6 +4705,13 @@ define <8 x i32> @test_pslld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pslld: +; SKX: # BB#0: +; SKX-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] +; SKX-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pslld: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [2:1.00] @@ -3961,6 +4742,11 @@ define <32 x i8> @test_pslldq(<32 x i8> %a0) { ; SKYLAKE-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pslldq: +; SKX: # BB#0: +; SKX-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pslldq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [2:1.00] @@ -3991,6 +4777,13 @@ define <4 x i64> @test_psllq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psllq: +; SKX: # BB#0: +; SKX-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] +; SKX-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psllq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [2:1.00] @@ -4024,6 +4817,12 @@ define <4 x i32> @test_psllvd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psllvd: +; SKX: # BB#0: +; SKX-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psllvd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -4055,6 +4854,12 @@ define <8 x i32> @test_psllvd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) ; SKYLAKE-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psllvd_ymm: +; SKX: # BB#0: +; SKX-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psllvd_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -4086,6 +4891,12 @@ define <2 x i64> @test_psllvq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psllvq: +; SKX: # BB#0: +; SKX-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psllvq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -4117,6 +4928,12 @@ define <4 x i64> @test_psllvq_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) ; SKYLAKE-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psllvq_ymm: +; SKX: # BB#0: +; SKX-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psllvq_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -4151,6 +4968,13 @@ define <16 x i16> @test_psllw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psllw: +; SKX: # BB#0: +; SKX-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] +; SKX-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psllw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [2:1.00] @@ -4187,6 +5011,13 @@ define <8 x i32> @test_psrad(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psrad: +; SKX: # BB#0: +; SKX-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00] +; SKX-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psrad: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [2:1.00] @@ -4220,6 +5051,12 @@ define <4 x i32> @test_psravd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psravd: +; SKX: # BB#0: +; SKX-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psravd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -4251,6 +5088,12 @@ define <8 x i32> @test_psravd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) ; SKYLAKE-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psravd_ymm: +; SKX: # BB#0: +; SKX-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psravd_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -4285,6 +5128,13 @@ define <16 x i16> @test_psraw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psraw: +; SKX: # BB#0: +; SKX-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] +; SKX-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psraw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [2:1.00] @@ -4321,6 +5171,13 @@ define <8 x i32> @test_psrld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psrld: +; SKX: # BB#0: +; SKX-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] +; SKX-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psrld: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [2:1.00] @@ -4351,6 +5208,11 @@ define <32 x i8> @test_psrldq(<32 x i8> %a0) { ; SKYLAKE-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psrldq: +; SKX: # BB#0: +; SKX-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psrldq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [2:1.00] @@ -4381,6 +5243,13 @@ define <4 x i64> @test_psrlq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psrlq: +; SKX: # BB#0: +; SKX-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] +; SKX-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psrlq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [2:1.00] @@ -4414,6 +5283,12 @@ define <4 x i32> @test_psrlvd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psrlvd: +; SKX: # BB#0: +; SKX-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psrlvd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -4445,6 +5320,12 @@ define <8 x i32> @test_psrlvd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) ; SKYLAKE-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psrlvd_ymm: +; SKX: # BB#0: +; SKX-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psrlvd_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -4476,6 +5357,12 @@ define <2 x i64> @test_psrlvq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psrlvq: +; SKX: # BB#0: +; SKX-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psrlvq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -4507,6 +5394,12 @@ define <4 x i64> @test_psrlvq_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) ; SKYLAKE-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psrlvq_ymm: +; SKX: # BB#0: +; SKX-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psrlvq_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -4541,6 +5434,13 @@ define <16 x i16> @test_psrlw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psrlw: +; SKX: # BB#0: +; SKX-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] +; SKX-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psrlw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [2:1.00] @@ -4574,6 +5474,12 @@ define <32 x i8> @test_psubb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; SKYLAKE-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psubb: +; SKX: # BB#0: +; SKX-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psubb: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -4604,6 +5510,12 @@ define <8 x i32> @test_psubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; SKYLAKE-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psubd: +; SKX: # BB#0: +; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psubd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -4634,6 +5546,12 @@ define <4 x i64> @test_psubq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; SKYLAKE-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psubq: +; SKX: # BB#0: +; SKX-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psubq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -4664,6 +5582,12 @@ define <32 x i8> @test_psubsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; SKYLAKE-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psubsb: +; SKX: # BB#0: +; SKX-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psubsb: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -4695,6 +5619,12 @@ define <16 x i16> @test_psubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; SKYLAKE-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psubsw: +; SKX: # BB#0: +; SKX-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psubsw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -4726,6 +5656,12 @@ define <32 x i8> @test_psubusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; SKYLAKE-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psubusb: +; SKX: # BB#0: +; SKX-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psubusb: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -4757,6 +5693,12 @@ define <16 x i16> @test_psubusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; SKYLAKE-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psubusw: +; SKX: # BB#0: +; SKX-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SKX-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psubusw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -4788,6 +5730,12 @@ define <16 x i16> @test_psubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { ; SKYLAKE-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psubw: +; SKX: # BB#0: +; SKX-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_psubw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] @@ -4818,6 +5766,12 @@ define <32 x i8> @test_punpckhbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_punpckhbw: +; SKX: # BB#0: +; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00] +; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_punpckhbw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:0.25] @@ -4854,6 +5808,14 @@ define <8 x i32> @test_punpckhdq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_punpckhdq: +; SKX: # BB#0: +; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] +; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [1:1.00] +; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:1.00] +; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_punpckhdq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:0.25] @@ -4890,6 +5852,13 @@ define <4 x i64> @test_punpckhqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) ; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_punpckhqdq: +; SKX: # BB#0: +; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] +; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [1:1.00] +; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_punpckhqdq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:0.25] @@ -4922,6 +5891,12 @@ define <16 x i16> @test_punpckhwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a ; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_punpckhwd: +; SKX: # BB#0: +; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00] +; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_punpckhwd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:0.25] @@ -4952,6 +5927,12 @@ define <32 x i8> @test_punpcklbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_punpcklbw: +; SKX: # BB#0: +; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00] +; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_punpcklbw: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:0.25] @@ -4988,6 +5969,14 @@ define <8 x i32> @test_punpckldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_punpckldq: +; SKX: # BB#0: +; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] +; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00] +; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:1.00] +; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_punpckldq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:0.25] @@ -5024,6 +6013,13 @@ define <4 x i64> @test_punpcklqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) ; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_punpcklqdq: +; SKX: # BB#0: +; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] +; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [1:1.00] +; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_punpcklqdq: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:0.25] @@ -5056,6 +6052,12 @@ define <16 x i16> @test_punpcklwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a ; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_punpcklwd: +; SKX: # BB#0: +; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00] +; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_punpcklwd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:0.25] @@ -5089,6 +6091,13 @@ define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pxor: +; SKX: # BB#0: +; SKX-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_pxor: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.25] diff --git a/test/CodeGen/X86/fma-schedule.ll b/test/CodeGen/X86/fma-schedule.ll index c127451b8cb..6d8bc2dc877 100644 --- a/test/CodeGen/X86/fma-schedule.ll +++ b/test/CodeGen/X86/fma-schedule.ll @@ -3,6 +3,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 ; @@ -38,6 +39,12 @@ define <2 x double> @test_vfmadd213pd(<2 x double> %a0, <2 x double> %a1, <2 x d ; KNL-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfmadd213pd: +; SKX: # BB#0: +; SKX-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfmadd213pd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 @@ -74,6 +81,12 @@ define <4 x double> @test_vfmadd213pd_ymm(<4 x double> %a0, <4 x double> %a1, <4 ; KNL-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfmadd213pd_ymm: +; SKX: # BB#0: +; SKX-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfmadd213pd_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 @@ -110,6 +123,12 @@ define <4 x float> @test_vfmadd213ps(<4 x float> %a0, <4 x float> %a1, <4 x floa ; KNL-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfmadd213ps: +; SKX: # BB#0: +; SKX-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfmadd213ps: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 @@ -146,6 +165,12 @@ define <8 x float> @test_vfmadd213ps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x ; KNL-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfmadd213ps_ymm: +; SKX: # BB#0: +; SKX-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfmadd213ps_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 @@ -182,6 +207,12 @@ define <2 x double> @test_vfmadd213sd(<2 x double> %a0, <2 x double> %a1, <2 x d ; KNL-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfmadd213sd: +; SKX: # BB#0: +; SKX-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfmadd213sd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 @@ -218,6 +249,12 @@ define <4 x float> @test_vfmadd213ss(<4 x float> %a0, <4 x float> %a1, <4 x floa ; KNL-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfmadd213ss: +; SKX: # BB#0: +; SKX-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfmadd213ss: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 @@ -266,6 +303,12 @@ define <2 x double> @test_vfmaddsubpd(<2 x double> %a0, <2 x double> %a1, <2 x d ; KNL-NEXT: vfmaddsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfmaddsubpd: +; SKX: # BB#0: +; SKX-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vfmaddsub213pd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfmaddsubpd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 @@ -302,6 +345,12 @@ define <4 x double> @test_vfmaddsubpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 ; KNL-NEXT: vfmaddsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfmaddsubpd_ymm: +; SKX: # BB#0: +; SKX-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vfmaddsub213pd (%rdi), %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfmaddsubpd_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 @@ -338,6 +387,12 @@ define <4 x float> @test_vfmaddsubps(<4 x float> %a0, <4 x float> %a1, <4 x floa ; KNL-NEXT: vfmaddsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfmaddsubps: +; SKX: # BB#0: +; SKX-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vfmaddsub213ps (%rdi), %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfmaddsubps: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 @@ -374,6 +429,12 @@ define <8 x float> @test_vfmaddsubps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x ; KNL-NEXT: vfmaddsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfmaddsubps_ymm: +; SKX: # BB#0: +; SKX-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vfmaddsub213ps (%rdi), %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfmaddsubps_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 @@ -422,6 +483,12 @@ define <2 x double> @test_vfmsubaddpd(<2 x double> %a0, <2 x double> %a1, <2 x d ; KNL-NEXT: vfmsubadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfmsubaddpd: +; SKX: # BB#0: +; SKX-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vfmsubadd213pd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfmsubaddpd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 @@ -458,6 +525,12 @@ define <4 x double> @test_vfmsubaddpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 ; KNL-NEXT: vfmsubadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfmsubaddpd_ymm: +; SKX: # BB#0: +; SKX-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vfmsubadd213pd (%rdi), %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfmsubaddpd_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 @@ -494,6 +567,12 @@ define <4 x float> @test_vfmsubaddps(<4 x float> %a0, <4 x float> %a1, <4 x floa ; KNL-NEXT: vfmsubadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfmsubaddps: +; SKX: # BB#0: +; SKX-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vfmsubadd213ps (%rdi), %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfmsubaddps: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 @@ -530,6 +609,12 @@ define <8 x float> @test_vfmsubaddps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x ; KNL-NEXT: vfmsubadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfmsubaddps_ymm: +; SKX: # BB#0: +; SKX-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vfmsubadd213ps (%rdi), %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfmsubaddps_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 @@ -578,6 +663,12 @@ define <2 x double> @test_vfmsub213pd(<2 x double> %a0, <2 x double> %a1, <2 x d ; KNL-NEXT: vfmsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfmsub213pd: +; SKX: # BB#0: +; SKX-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vfmsub213pd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfmsub213pd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 @@ -614,6 +705,12 @@ define <4 x double> @test_vfmsub213pd_ymm(<4 x double> %a0, <4 x double> %a1, <4 ; KNL-NEXT: vfmsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfmsub213pd_ymm: +; SKX: # BB#0: +; SKX-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vfmsub213pd (%rdi), %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfmsub213pd_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 @@ -650,6 +747,12 @@ define <4 x float> @test_vfmsub213ps(<4 x float> %a0, <4 x float> %a1, <4 x floa ; KNL-NEXT: vfmsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfmsub213ps: +; SKX: # BB#0: +; SKX-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vfmsub213ps (%rdi), %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfmsub213ps: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 @@ -686,6 +789,12 @@ define <8 x float> @test_vfmsub213ps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x ; KNL-NEXT: vfmsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfmsub213ps_ymm: +; SKX: # BB#0: +; SKX-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vfmsub213ps (%rdi), %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfmsub213ps_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 @@ -722,6 +831,12 @@ define <2 x double> @test_vfmsub213sd(<2 x double> %a0, <2 x double> %a1, <2 x d ; KNL-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfmsub213sd: +; SKX: # BB#0: +; SKX-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfmsub213sd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 @@ -758,6 +873,12 @@ define <4 x float> @test_vfmsub213ss(<4 x float> %a0, <4 x float> %a1, <4 x floa ; KNL-NEXT: vfmsub213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfmsub213ss: +; SKX: # BB#0: +; SKX-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vfmsub213ss (%rdi), %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfmsub213ss: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 @@ -806,6 +927,12 @@ define <2 x double> @test_vfnmadd213pd(<2 x double> %a0, <2 x double> %a1, <2 x ; KNL-NEXT: vfnmadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfnmadd213pd: +; SKX: # BB#0: +; SKX-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vfnmadd213pd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfnmadd213pd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 @@ -842,6 +969,12 @@ define <4 x double> @test_vfnmadd213pd_ymm(<4 x double> %a0, <4 x double> %a1, < ; KNL-NEXT: vfnmadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfnmadd213pd_ymm: +; SKX: # BB#0: +; SKX-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vfnmadd213pd (%rdi), %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfnmadd213pd_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 @@ -878,6 +1011,12 @@ define <4 x float> @test_vfnmadd213ps(<4 x float> %a0, <4 x float> %a1, <4 x flo ; KNL-NEXT: vfnmadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfnmadd213ps: +; SKX: # BB#0: +; SKX-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vfnmadd213ps (%rdi), %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfnmadd213ps: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 @@ -914,6 +1053,12 @@ define <8 x float> @test_vfnmadd213ps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x ; KNL-NEXT: vfnmadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfnmadd213ps_ymm: +; SKX: # BB#0: +; SKX-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vfnmadd213ps (%rdi), %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfnmadd213ps_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 @@ -950,6 +1095,12 @@ define <2 x double> @test_vfnmadd213sd(<2 x double> %a0, <2 x double> %a1, <2 x ; KNL-NEXT: vfnmadd213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfnmadd213sd: +; SKX: # BB#0: +; SKX-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vfnmadd213sd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfnmadd213sd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 @@ -986,6 +1137,12 @@ define <4 x float> @test_vfnmadd213ss(<4 x float> %a0, <4 x float> %a1, <4 x flo ; KNL-NEXT: vfnmadd213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfnmadd213ss: +; SKX: # BB#0: +; SKX-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vfnmadd213ss (%rdi), %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfnmadd213ss: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 @@ -1034,6 +1191,12 @@ define <2 x double> @test_vfnmsub213pd(<2 x double> %a0, <2 x double> %a1, <2 x ; KNL-NEXT: vfnmsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfnmsub213pd: +; SKX: # BB#0: +; SKX-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vfnmsub213pd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfnmsub213pd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 @@ -1070,6 +1233,12 @@ define <4 x double> @test_vfnmsub213pd_ymm(<4 x double> %a0, <4 x double> %a1, < ; KNL-NEXT: vfnmsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfnmsub213pd_ymm: +; SKX: # BB#0: +; SKX-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vfnmsub213pd (%rdi), %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfnmsub213pd_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 @@ -1106,6 +1275,12 @@ define <4 x float> @test_vfnmsub213ps(<4 x float> %a0, <4 x float> %a1, <4 x flo ; KNL-NEXT: vfnmsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfnmsub213ps: +; SKX: # BB#0: +; SKX-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vfnmsub213ps (%rdi), %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfnmsub213ps: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 @@ -1142,6 +1317,12 @@ define <8 x float> @test_vfnmsub213ps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x ; KNL-NEXT: vfnmsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfnmsub213ps_ymm: +; SKX: # BB#0: +; SKX-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vfnmsub213ps (%rdi), %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfnmsub213ps_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 @@ -1178,6 +1359,12 @@ define <2 x double> @test_vfnmsub213sd(<2 x double> %a0, <2 x double> %a1, <2 x ; KNL-NEXT: vfnmsub213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfnmsub213sd: +; SKX: # BB#0: +; SKX-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vfnmsub213sd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfnmsub213sd: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 @@ -1214,6 +1401,12 @@ define <4 x float> @test_vfnmsub213ss(<4 x float> %a0, <4 x float> %a1, <4 x flo ; KNL-NEXT: vfnmsub213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_vfnmsub213ss: +; SKX: # BB#0: +; SKX-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vfnmsub213ss (%rdi), %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; ZNVER1-LABEL: test_vfnmsub213ss: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 diff --git a/test/CodeGen/X86/sse-schedule.ll b/test/CodeGen/X86/sse-schedule.ll index 9100809d373..aaa9ff51088 100644 --- a/test/CodeGen/X86/sse-schedule.ll +++ b/test/CodeGen/X86/sse-schedule.ll @@ -6,6 +6,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 @@ -46,6 +47,12 @@ define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SKYLAKE-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_addps: +; SKX: # BB#0: +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_addps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -100,6 +107,12 @@ define float @test_addss(float %a0, float %a1, float *%a2) { ; SKYLAKE-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_addss: +; SKX: # BB#0: +; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_addss: ; BTVER2: # BB#0: ; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -158,6 +171,12 @@ define <4 x float> @test_andps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SKYLAKE-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_andps: +; SKX: # BB#0: +; SKX-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_andps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -220,6 +239,12 @@ define <4 x float> @test_andnotps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SKYLAKE-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_andnotps: +; SKX: # BB#0: +; SKX-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_andnotps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -286,6 +311,14 @@ define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SKYLAKE-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cmpps: +; SKX: # BB#0: +; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 +; SKX-NEXT: vcmpeqps (%rdi), %xmm0, %k1 +; SKX-NEXT: korw %k1, %k0, %k0 +; SKX-NEXT: vpmovm2d %k0, %xmm0 +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cmpps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00] @@ -345,6 +378,12 @@ define float @test_cmpss(float %a0, float %a1, float *%a2) { ; SKYLAKE-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cmpss: +; SKX: # BB#0: +; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SKX-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cmpss: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -452,6 +491,20 @@ define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_comiss: +; SKX: # BB#0: +; SKX-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00] +; SKX-NEXT: setnp %al # sched: [1:1.00] +; SKX-NEXT: sete %cl # sched: [1:1.00] +; SKX-NEXT: andb %al, %cl # sched: [1:0.25] +; SKX-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00] +; SKX-NEXT: setnp %al # sched: [1:1.00] +; SKX-NEXT: sete %dl # sched: [1:1.00] +; SKX-NEXT: andb %al, %dl # sched: [1:0.25] +; SKX-NEXT: orb %cl, %dl # sched: [1:0.25] +; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_comiss: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00] @@ -530,6 +583,13 @@ define float @test_cvtsi2ss(i32 %a0, i32 *%a1) { ; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvtsi2ss: +; SKX: # BB#0: +; SKX-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00] +; SKX-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] +; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvtsi2ss: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [3:1.00] @@ -593,6 +653,13 @@ define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) { ; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvtsi2ssq: +; SKX: # BB#0: +; SKX-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00] +; SKX-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] +; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvtsi2ssq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [3:1.00] @@ -656,6 +723,13 @@ define i32 @test_cvtss2si(float %a0, float *%a1) { ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvtss2si: +; SKX: # BB#0: +; SKX-NEXT: vcvtss2si %xmm0, %ecx # sched: [6:1.00] +; SKX-NEXT: vcvtss2si (%rdi), %eax # sched: [6:1.00] +; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvtss2si: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvtss2si (%rdi), %eax # sched: [8:1.00] @@ -722,6 +796,13 @@ define i64 @test_cvtss2siq(float %a0, float *%a1) { ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvtss2siq: +; SKX: # BB#0: +; SKX-NEXT: vcvtss2si %xmm0, %rcx # sched: [6:1.00] +; SKX-NEXT: vcvtss2si (%rdi), %rax # sched: [6:1.00] +; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvtss2siq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvtss2si (%rdi), %rax # sched: [8:1.00] @@ -788,6 +869,13 @@ define i32 @test_cvttss2si(float %a0, float *%a1) { ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvttss2si: +; SKX: # BB#0: +; SKX-NEXT: vcvttss2si %xmm0, %ecx # sched: [7:1.00] +; SKX-NEXT: vcvttss2si (%rdi), %eax # sched: [6:1.00] +; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvttss2si: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvttss2si (%rdi), %eax # sched: [8:1.00] @@ -851,6 +939,13 @@ define i64 @test_cvttss2siq(float %a0, float *%a1) { ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvttss2siq: +; SKX: # BB#0: +; SKX-NEXT: vcvttss2si %xmm0, %rcx # sched: [7:1.00] +; SKX-NEXT: vcvttss2si (%rdi), %rax # sched: [6:1.00] +; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvttss2siq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvttss2si (%rdi), %rax # sched: [8:1.00] @@ -908,6 +1003,12 @@ define <4 x float> @test_divps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SKYLAKE-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_divps: +; SKX: # BB#0: +; SKX-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:1.00] +; SKX-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [11:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_divps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [19:19.00] @@ -962,6 +1063,12 @@ define float @test_divss(float %a0, float %a1, float *%a2) { ; SKYLAKE-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_divss: +; SKX: # BB#0: +; SKX-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:1.00] +; SKX-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [11:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_divss: ; BTVER2: # BB#0: ; BTVER2-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [19:19.00] @@ -1016,6 +1123,12 @@ define void @test_ldmxcsr(i32 %a0) { ; SKYLAKE-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_ldmxcsr: +; SKX: # BB#0: +; SKX-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; SKX-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_ldmxcsr: ; BTVER2: # BB#0: ; BTVER2-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] @@ -1072,6 +1185,12 @@ define <4 x float> @test_maxps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SKYLAKE-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_maxps: +; SKX: # BB#0: +; SKX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SKX-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_maxps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -1127,6 +1246,12 @@ define <4 x float> @test_maxss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SKYLAKE-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_maxss: +; SKX: # BB#0: +; SKX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_maxss: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -1182,6 +1307,12 @@ define <4 x float> @test_minps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SKYLAKE-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_minps: +; SKX: # BB#0: +; SKX-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SKX-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_minps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -1237,6 +1368,12 @@ define <4 x float> @test_minss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SKYLAKE-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_minss: +; SKX: # BB#0: +; SKX-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_minss: ; BTVER2: # BB#0: ; BTVER2-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -1298,6 +1435,13 @@ define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movaps: +; SKX: # BB#0: +; SKX-NEXT: vmovaps (%rdi), %xmm0 # sched: [1:0.50] +; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movaps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovaps (%rdi), %xmm0 # sched: [5:1.00] @@ -1356,6 +1500,11 @@ define <4 x float> @test_movhlps(<4 x float> %a0, <4 x float> %a1) { ; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movhlps: +; SKX: # BB#0: +; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movhlps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50] @@ -1416,6 +1565,13 @@ define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movhps: +; SKX: # BB#0: +; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movhps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] @@ -1478,6 +1634,12 @@ define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) { ; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movlhps: +; SKX: # BB#0: +; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movlhps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] @@ -1537,6 +1699,13 @@ define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; SKYLAKE-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movlps: +; SKX: # BB#0: +; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movlps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] @@ -1593,6 +1762,11 @@ define i32 @test_movmskps(<4 x float> %a0) { ; SKYLAKE-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movmskps: +; SKX: # BB#0: +; SKX-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movmskps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovmskps %xmm0, %eax # sched: [1:0.50] @@ -1644,6 +1818,11 @@ define void @test_movntps(<4 x float> %a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movntps: +; SKX: # BB#0: +; SKX-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movntps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] @@ -1700,6 +1879,13 @@ define void @test_movss_mem(float* %a0, float* %a1) { ; SKYLAKE-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movss_mem: +; SKX: # BB#0: +; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50] +; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movss_mem: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00] @@ -1756,6 +1942,11 @@ define <4 x float> @test_movss_reg(<4 x float> %a0, <4 x float> %a1) { ; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movss_reg: +; SKX: # BB#0: +; SKX-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movss_reg: ; BTVER2: # BB#0: ; BTVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50] @@ -1812,6 +2003,13 @@ define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movups: +; SKX: # BB#0: +; SKX-NEXT: vmovups (%rdi), %xmm0 # sched: [1:0.50] +; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movups: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovups (%rdi), %xmm0 # sched: [5:1.00] @@ -1868,6 +2066,12 @@ define <4 x float> @test_mulps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SKYLAKE-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_mulps: +; SKX: # BB#0: +; SKX-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_mulps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -1922,6 +2126,12 @@ define float @test_mulss(float %a0, float %a1, float *%a2) { ; SKYLAKE-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_mulss: +; SKX: # BB#0: +; SKX-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_mulss: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -1980,6 +2190,12 @@ define <4 x float> @test_orps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2 ; SKYLAKE-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_orps: +; SKX: # BB#0: +; SKX-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_orps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -2038,6 +2254,11 @@ define void @test_prefetchnta(i8* %a0) { ; SKYLAKE-NEXT: prefetchnta (%rdi) # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_prefetchnta: +; SKX: # BB#0: +; SKX-NEXT: prefetchnta (%rdi) # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_prefetchnta: ; BTVER2: # BB#0: ; BTVER2-NEXT: prefetchnta (%rdi) # sched: [5:1.00] @@ -2097,6 +2318,13 @@ define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_rcpps: +; SKX: # BB#0: +; SKX-NEXT: vrcp14ps %xmm0, %xmm0 +; SKX-NEXT: vrcp14ps (%rdi), %xmm1 +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_rcpps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vrcpps (%rdi), %xmm1 # sched: [7:1.00] @@ -2169,6 +2397,14 @@ define <4 x float> @test_rcpss(float %a0, float *%a1) { ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_rcpss: +; SKX: # BB#0: +; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] +; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50] +; SKX-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_rcpss: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00] @@ -2239,6 +2475,13 @@ define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_rsqrtps: +; SKX: # BB#0: +; SKX-NEXT: vrsqrt14ps %xmm0, %xmm0 +; SKX-NEXT: vrsqrt14ps (%rdi), %xmm1 +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_rsqrtps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [7:1.00] @@ -2311,6 +2554,14 @@ define <4 x float> @test_rsqrtss(float %a0, float *%a1) { ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_rsqrtss: +; SKX: # BB#0: +; SKX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] +; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50] +; SKX-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_rsqrtss: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00] @@ -2373,6 +2624,11 @@ define void @test_sfence() { ; SKYLAKE-NEXT: sfence # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_sfence: +; SKX: # BB#0: +; SKX-NEXT: sfence # sched: [1:0.33] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_sfence: ; BTVER2: # BB#0: ; BTVER2-NEXT: sfence # sched: [1:1.00] @@ -2428,6 +2684,12 @@ define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; SKYLAKE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_shufps: +; SKX: # BB#0: +; SKX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] +; SKX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_shufps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50] @@ -2489,6 +2751,13 @@ define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_sqrtps: +; SKX: # BB#0: +; SKX-NEXT: vsqrtps %xmm0, %xmm0 # sched: [12:1.00] +; SKX-NEXT: vsqrtps (%rdi), %xmm1 # sched: [12:1.00] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_sqrtps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vsqrtps (%rdi), %xmm1 # sched: [26:21.00] @@ -2561,6 +2830,14 @@ define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_sqrtss: +; SKX: # BB#0: +; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:1.00] +; SKX-NEXT: vmovaps (%rdi), %xmm1 # sched: [1:0.50] +; SKX-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [12:1.00] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_sqrtss: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovaps (%rdi), %xmm1 # sched: [5:1.00] @@ -2621,6 +2898,12 @@ define i32 @test_stmxcsr() { ; SKYLAKE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_stmxcsr: +; SKX: # BB#0: +; SKX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; SKX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_stmxcsr: ; BTVER2: # BB#0: ; BTVER2-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00] @@ -2677,6 +2960,12 @@ define <4 x float> @test_subps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SKYLAKE-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_subps: +; SKX: # BB#0: +; SKX-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_subps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -2731,6 +3020,12 @@ define float @test_subss(float %a0, float %a1, float *%a2) { ; SKYLAKE-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_subss: +; SKX: # BB#0: +; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_subss: ; BTVER2: # BB#0: ; BTVER2-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -2833,6 +3128,20 @@ define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_ucomiss: +; SKX: # BB#0: +; SKX-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00] +; SKX-NEXT: setnp %al # sched: [1:1.00] +; SKX-NEXT: sete %cl # sched: [1:1.00] +; SKX-NEXT: andb %al, %cl # sched: [1:0.25] +; SKX-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00] +; SKX-NEXT: setnp %al # sched: [1:1.00] +; SKX-NEXT: sete %dl # sched: [1:1.00] +; SKX-NEXT: andb %al, %dl # sched: [1:0.25] +; SKX-NEXT: orb %cl, %dl # sched: [1:0.25] +; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_ucomiss: ; BTVER2: # BB#0: ; BTVER2-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00] @@ -2909,6 +3218,12 @@ define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SKYLAKE-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_unpckhps: +; SKX: # BB#0: +; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_unpckhps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] @@ -2967,6 +3282,12 @@ define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SKYLAKE-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_unpcklps: +; SKX: # BB#0: +; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] +; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_unpcklps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] @@ -3025,6 +3346,12 @@ define <4 x float> @test_xorps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SKYLAKE-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_xorps: +; SKX: # BB#0: +; SKX-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_xorps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] diff --git a/test/CodeGen/X86/sse2-schedule.ll b/test/CodeGen/X86/sse2-schedule.ll index 8e9649de507..2e88e388469 100644 --- a/test/CodeGen/X86/sse2-schedule.ll +++ b/test/CodeGen/X86/sse2-schedule.ll @@ -6,6 +6,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 @@ -46,6 +47,12 @@ define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKYLAKE-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_addpd: +; SKX: # BB#0: +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_addpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -100,6 +107,12 @@ define double @test_addsd(double %a0, double %a1, double *%a2) { ; SKYLAKE-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_addsd: +; SKX: # BB#0: +; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_addsd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -160,6 +173,13 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_andpd: +; SKX: # BB#0: +; SKX-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_andpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -227,6 +247,13 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_andnotpd: +; SKX: # BB#0: +; SKX-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_andnotpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -296,6 +323,14 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKYLAKE-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cmppd: +; SKX: # BB#0: +; SKX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 +; SKX-NEXT: vcmpeqpd (%rdi), %xmm0, %k1 +; SKX-NEXT: korw %k1, %k0, %k0 +; SKX-NEXT: vpmovm2q %k0, %xmm0 +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cmppd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] @@ -355,6 +390,12 @@ define double @test_cmpsd(double %a0, double %a1, double *%a2) { ; SKYLAKE-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cmpsd: +; SKX: # BB#0: +; SKX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SKX-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cmpsd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -462,6 +503,20 @@ define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_comisd: +; SKX: # BB#0: +; SKX-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] +; SKX-NEXT: setnp %al # sched: [1:1.00] +; SKX-NEXT: sete %cl # sched: [1:1.00] +; SKX-NEXT: andb %al, %cl # sched: [1:0.25] +; SKX-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00] +; SKX-NEXT: setnp %al # sched: [1:1.00] +; SKX-NEXT: sete %dl # sched: [1:1.00] +; SKX-NEXT: andb %al, %dl # sched: [1:0.25] +; SKX-NEXT: orb %cl, %dl # sched: [1:0.25] +; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_comisd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] @@ -540,6 +595,13 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvtdq2pd: +; SKX: # BB#0: +; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00] +; SKX-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [5:1.00] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvtdq2pd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [8:1.00] @@ -606,6 +668,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) { ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvtdq2ps: +; SKX: # BB#0: +; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [4:0.50] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvtdq2ps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00] @@ -670,6 +739,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvtpd2dq: +; SKX: # BB#0: +; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00] +; SKX-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvtpd2dq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00] @@ -735,6 +811,13 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvtpd2ps: +; SKX: # BB#0: +; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00] +; SKX-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvtpd2ps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00] @@ -800,6 +883,13 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvtps2dq: +; SKX: # BB#0: +; SKX-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [4:0.50] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvtps2dq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [8:1.00] @@ -865,6 +955,13 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvtps2pd: +; SKX: # BB#0: +; SKX-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00] +; SKX-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [4:0.50] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvtps2pd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [8:1.00] @@ -930,6 +1027,13 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) { ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvtsd2si: +; SKX: # BB#0: +; SKX-NEXT: vcvtsd2si %xmm0, %ecx # sched: [6:1.00] +; SKX-NEXT: vcvtsd2si (%rdi), %eax # sched: [6:1.00] +; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvtsd2si: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvtsd2si (%rdi), %eax # sched: [8:1.00] @@ -996,6 +1100,13 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) { ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvtsd2siq: +; SKX: # BB#0: +; SKX-NEXT: vcvtsd2si %xmm0, %rcx # sched: [6:1.00] +; SKX-NEXT: vcvtsd2si (%rdi), %rax # sched: [6:1.00] +; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvtsd2siq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvtsd2si (%rdi), %rax # sched: [8:1.00] @@ -1069,6 +1180,14 @@ define float @test_cvtsd2ss(double %a0, double *%a1) { ; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvtsd2ss: +; SKX: # BB#0: +; SKX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] +; SKX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [1:0.50] +; SKX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] +; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvtsd2ss: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00] @@ -1134,6 +1253,13 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) { ; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvtsi2sd: +; SKX: # BB#0: +; SKX-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00] +; SKX-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] +; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvtsi2sd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [3:1.00] @@ -1197,6 +1323,13 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) { ; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvtsi2sdq: +; SKX: # BB#0: +; SKX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00] +; SKX-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] +; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvtsi2sdq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [3:1.00] @@ -1269,6 +1402,14 @@ define double @test_cvtss2sd(float %a0, float *%a1) { ; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvtss2sd: +; SKX: # BB#0: +; SKX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] +; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50] +; SKX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [5:1.00] +; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvtss2sd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00] @@ -1335,6 +1476,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvttpd2dq: +; SKX: # BB#0: +; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00] +; SKX-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvttpd2dq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00] @@ -1401,6 +1549,13 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvttps2dq: +; SKX: # BB#0: +; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [4:0.50] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvttps2dq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [8:1.00] @@ -1464,6 +1619,13 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) { ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvttsd2si: +; SKX: # BB#0: +; SKX-NEXT: vcvttsd2si %xmm0, %ecx # sched: [6:1.00] +; SKX-NEXT: vcvttsd2si (%rdi), %eax # sched: [6:1.00] +; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvttsd2si: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvttsd2si (%rdi), %eax # sched: [8:1.00] @@ -1527,6 +1689,13 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) { ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_cvttsd2siq: +; SKX: # BB#0: +; SKX-NEXT: vcvttsd2si %xmm0, %rcx # sched: [6:1.00] +; SKX-NEXT: vcvttsd2si (%rdi), %rax # sched: [6:1.00] +; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_cvttsd2siq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vcvttsd2si (%rdi), %rax # sched: [8:1.00] @@ -1584,6 +1753,12 @@ define <2 x double> @test_divpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKYLAKE-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [14:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_divpd: +; SKX: # BB#0: +; SKX-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:1.00] +; SKX-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [14:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_divpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [19:19.00] @@ -1638,6 +1813,12 @@ define double @test_divsd(double %a0, double %a1, double *%a2) { ; SKYLAKE-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [14:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_divsd: +; SKX: # BB#0: +; SKX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:1.00] +; SKX-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [14:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_divsd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [19:19.00] @@ -1692,6 +1873,11 @@ define void @test_lfence() { ; SKYLAKE-NEXT: lfence # sched: [2:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_lfence: +; SKX: # BB#0: +; SKX-NEXT: lfence # sched: [2:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_lfence: ; BTVER2: # BB#0: ; BTVER2-NEXT: lfence # sched: [1:1.00] @@ -1743,6 +1929,11 @@ define void @test_mfence() { ; SKYLAKE-NEXT: mfence # sched: [2:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_mfence: +; SKX: # BB#0: +; SKX-NEXT: mfence # sched: [2:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_mfence: ; BTVER2: # BB#0: ; BTVER2-NEXT: mfence # sched: [1:1.00] @@ -1792,6 +1983,11 @@ define void @test_maskmovdqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) { ; SKYLAKE-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_maskmovdqu: +; SKX: # BB#0: +; SKX-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_maskmovdqu: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] @@ -1843,6 +2039,12 @@ define <2 x double> @test_maxpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKYLAKE-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_maxpd: +; SKX: # BB#0: +; SKX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SKX-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_maxpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -1898,6 +2100,12 @@ define <2 x double> @test_maxsd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKYLAKE-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_maxsd: +; SKX: # BB#0: +; SKX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_maxsd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -1953,6 +2161,12 @@ define <2 x double> @test_minpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKYLAKE-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_minpd: +; SKX: # BB#0: +; SKX-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SKX-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_minpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -2008,6 +2222,12 @@ define <2 x double> @test_minsd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKYLAKE-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_minsd: +; SKX: # BB#0: +; SKX-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_minsd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -2069,6 +2289,13 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movapd: +; SKX: # BB#0: +; SKX-NEXT: vmovapd (%rdi), %xmm0 # sched: [1:0.50] +; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movapd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovapd (%rdi), %xmm0 # sched: [5:1.00] @@ -2131,6 +2358,13 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) { ; SKYLAKE-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movdqa: +; SKX: # BB#0: +; SKX-NEXT: vmovdqa (%rdi), %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movdqa: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovdqa (%rdi), %xmm0 # sched: [5:1.00] @@ -2193,6 +2427,13 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) { ; SKYLAKE-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movdqu: +; SKX: # BB#0: +; SKX-NEXT: vmovdqu (%rdi), %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movdqu: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovdqu (%rdi), %xmm0 # sched: [5:1.00] @@ -2273,6 +2514,16 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; SKYLAKE-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movd: +; SKX: # BB#0: +; SKX-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] +; SKX-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [1:0.50] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; SKX-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] +; SKX-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00] @@ -2364,6 +2615,16 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) { ; SKYLAKE-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movd_64: +; SKX: # BB#0: +; SKX-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] +; SKX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [1:0.50] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; SKX-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] +; SKX-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movd_64: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00] @@ -2437,6 +2698,13 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; SKYLAKE-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movhpd: +; SKX: # BB#0: +; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movhpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] @@ -2502,6 +2770,13 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; SKYLAKE-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movlpd: +; SKX: # BB#0: +; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movlpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] @@ -2557,6 +2832,11 @@ define i32 @test_movmskpd(<2 x double> %a0) { ; SKYLAKE-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movmskpd: +; SKX: # BB#0: +; SKX-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movmskpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovmskpd %xmm0, %eax # sched: [1:0.50] @@ -2610,6 +2890,12 @@ define void @test_movntdqa(<2 x i64> %a0, <2 x i64> *%a1) { ; SKYLAKE-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movntdqa: +; SKX: # BB#0: +; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movntdqa: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] @@ -2663,6 +2949,12 @@ define void @test_movntpd(<2 x double> %a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movntpd: +; SKX: # BB#0: +; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movntpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] @@ -2722,6 +3014,13 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) { ; SKYLAKE-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movq_mem: +; SKX: # BB#0: +; SKX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [1:0.50] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movq_mem: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00] @@ -2782,6 +3081,12 @@ define <2 x i64> @test_movq_reg(<2 x i64> %a0, <2 x i64> %a1) { ; SKYLAKE-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movq_reg: +; SKX: # BB#0: +; SKX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50] +; SKX-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movq_reg: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50] @@ -2841,6 +3146,13 @@ define void @test_movsd_mem(double* %a0, double* %a1) { ; SKYLAKE-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movsd_mem: +; SKX: # BB#0: +; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [1:0.50] +; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movsd_mem: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00] @@ -2898,6 +3210,11 @@ define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) { ; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movsd_reg: +; SKX: # BB#0: +; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movsd_reg: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50] @@ -2954,6 +3271,13 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movupd: +; SKX: # BB#0: +; SKX-NEXT: vmovupd (%rdi), %xmm0 # sched: [1:0.50] +; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movupd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovupd (%rdi), %xmm0 # sched: [5:1.00] @@ -3010,6 +3334,12 @@ define <2 x double> @test_mulpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKYLAKE-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_mulpd: +; SKX: # BB#0: +; SKX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_mulpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -3064,6 +3394,12 @@ define double @test_mulsd(double %a0, double %a1, double *%a2) { ; SKYLAKE-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_mulsd: +; SKX: # BB#0: +; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_mulsd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -3124,6 +3460,13 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_orpd: +; SKX: # BB#0: +; SKX-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_orpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -3189,6 +3532,12 @@ define <8 x i16> @test_packssdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_packssdw: +; SKX: # BB#0: +; SKX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_packssdw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -3249,6 +3598,12 @@ define <16 x i8> @test_packsswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_packsswb: +; SKX: # BB#0: +; SKX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_packsswb: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -3309,6 +3664,12 @@ define <16 x i8> @test_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_packuswb: +; SKX: # BB#0: +; SKX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_packuswb: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -3369,6 +3730,12 @@ define <16 x i8> @test_paddb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_paddb: +; SKX: # BB#0: +; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_paddb: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -3427,6 +3794,12 @@ define <4 x i32> @test_paddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_paddd: +; SKX: # BB#0: +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_paddd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -3481,6 +3854,12 @@ define <2 x i64> @test_paddq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_paddq: +; SKX: # BB#0: +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_paddq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -3539,6 +3918,12 @@ define <16 x i8> @test_paddsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_paddsb: +; SKX: # BB#0: +; SKX-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_paddsb: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -3598,6 +3983,12 @@ define <8 x i16> @test_paddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_paddsw: +; SKX: # BB#0: +; SKX-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_paddsw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -3657,6 +4048,12 @@ define <16 x i8> @test_paddusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_paddusb: +; SKX: # BB#0: +; SKX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_paddusb: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -3716,6 +4113,12 @@ define <8 x i16> @test_paddusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_paddusw: +; SKX: # BB#0: +; SKX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_paddusw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -3775,6 +4178,12 @@ define <8 x i16> @test_paddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_paddw: +; SKX: # BB#0: +; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_paddw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -3835,6 +4244,13 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pand: +; SKX: # BB#0: +; SKX-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pand: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -3904,6 +4320,13 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pandn: +; SKX: # BB#0: +; SKX-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [1:0.50] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pandn: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -3967,6 +4390,12 @@ define <16 x i8> @test_pavgb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pavgb: +; SKX: # BB#0: +; SKX-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pavgb: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -4035,6 +4464,12 @@ define <8 x i16> @test_pavgw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pavgw: +; SKX: # BB#0: +; SKX-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pavgw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -4107,6 +4542,14 @@ define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pcmpeqb: +; SKX: # BB#0: +; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 +; SKX-NEXT: vpcmpeqb (%rdi), %xmm0, %k1 +; SKX-NEXT: korw %k1, %k0, %k0 +; SKX-NEXT: vpmovm2b %k0, %xmm0 +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pcmpeqb: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -4173,6 +4616,14 @@ define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pcmpeqd: +; SKX: # BB#0: +; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 +; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %k1 +; SKX-NEXT: korw %k1, %k0, %k0 +; SKX-NEXT: vpmovm2d %k0, %xmm0 +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pcmpeqd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -4239,6 +4690,14 @@ define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pcmpeqw: +; SKX: # BB#0: +; SKX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 +; SKX-NEXT: vpcmpeqw (%rdi), %xmm0, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: vpmovm2w %k0, %xmm0 +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pcmpeqw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -4306,6 +4765,14 @@ define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pcmpgtb: +; SKX: # BB#0: +; SKX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 +; SKX-NEXT: vpcmpgtb (%rdi), %xmm0, %k1 +; SKX-NEXT: korw %k1, %k0, %k0 +; SKX-NEXT: vpmovm2b %k0, %xmm0 +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pcmpgtb: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -4373,6 +4840,14 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pcmpgtd: +; SKX: # BB#0: +; SKX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 +; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %k1 +; SKX-NEXT: korw %k1, %k0, %k0 +; SKX-NEXT: vpmovm2d %k0, %xmm0 +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pcmpgtd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -4440,6 +4915,14 @@ define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pcmpgtw: +; SKX: # BB#0: +; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 +; SKX-NEXT: vpcmpgtw (%rdi), %xmm0, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: vpmovm2w %k0, %xmm0 +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pcmpgtw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -4498,6 +4981,12 @@ define i16 @test_pextrw(<8 x i16> %a0) { ; SKYLAKE-NEXT: # kill: %AX %AX %EAX ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pextrw: +; SKX: # BB#0: +; SKX-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] +; SKX-NEXT: # kill: %AX %AX %EAX +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pextrw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpextrw $6, %xmm0, %eax # sched: [1:0.50] @@ -4554,6 +5043,12 @@ define <8 x i16> @test_pinsrw(<8 x i16> %a0, i16 %a1, i16 *%a2) { ; SKYLAKE-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pinsrw: +; SKX: # BB#0: +; SKX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] +; SKX-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pinsrw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:0.50] @@ -4616,6 +5111,12 @@ define <4 x i32> @test_pmaddwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmaddwd: +; SKX: # BB#0: +; SKX-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmaddwd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -4676,6 +5177,12 @@ define <8 x i16> @test_pmaxsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmaxsw: +; SKX: # BB#0: +; SKX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmaxsw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -4735,6 +5242,12 @@ define <16 x i8> @test_pmaxub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmaxub: +; SKX: # BB#0: +; SKX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmaxub: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -4794,6 +5307,12 @@ define <8 x i16> @test_pminsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pminsw: +; SKX: # BB#0: +; SKX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pminsw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -4853,6 +5372,12 @@ define <16 x i8> @test_pminub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pminub: +; SKX: # BB#0: +; SKX-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pminub: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -4904,6 +5429,11 @@ define i32 @test_pmovmskb(<16 x i8> %a0) { ; SKYLAKE-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmovmskb: +; SKX: # BB#0: +; SKX-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmovmskb: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmovmskb %xmm0, %eax # sched: [1:0.50] @@ -4955,6 +5485,12 @@ define <8 x i16> @test_pmulhuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmulhuw: +; SKX: # BB#0: +; SKX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmulhuw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -5010,6 +5546,12 @@ define <8 x i16> @test_pmulhw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmulhw: +; SKX: # BB#0: +; SKX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmulhw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -5065,6 +5607,12 @@ define <8 x i16> @test_pmullw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmullw: +; SKX: # BB#0: +; SKX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmullw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -5127,6 +5675,12 @@ define <2 x i64> @test_pmuludq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmuludq: +; SKX: # BB#0: +; SKX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmuludq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -5189,6 +5743,13 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_por: +; SKX: # BB#0: +; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_por: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -5254,6 +5815,12 @@ define <2 x i64> @test_psadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psadbw: +; SKX: # BB#0: +; SKX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SKX-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_psadbw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -5318,6 +5885,13 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pshufd: +; SKX: # BB#0: +; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] +; SKX-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [1:1.00] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pshufd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00] @@ -5383,6 +5957,13 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) { ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pshufhw: +; SKX: # BB#0: +; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] +; SKX-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [1:1.00] +; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pshufhw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00] @@ -5448,6 +6029,13 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) { ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pshuflw: +; SKX: # BB#0: +; SKX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] +; SKX-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [1:1.00] +; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pshuflw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00] @@ -5511,6 +6099,13 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pslld: +; SKX: # BB#0: +; SKX-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; SKX-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pslld: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -5570,6 +6165,11 @@ define <4 x i32> @test_pslldq(<4 x i32> %a0) { ; SKYLAKE-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pslldq: +; SKX: # BB#0: +; SKX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pslldq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] @@ -5626,6 +6226,13 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psllq: +; SKX: # BB#0: +; SKX-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; SKX-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_psllq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -5691,6 +6298,13 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psllw: +; SKX: # BB#0: +; SKX-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; SKX-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_psllw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -5756,6 +6370,13 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psrad: +; SKX: # BB#0: +; SKX-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; SKX-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_psrad: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -5821,6 +6442,13 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psraw: +; SKX: # BB#0: +; SKX-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; SKX-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_psraw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -5886,6 +6514,13 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psrld: +; SKX: # BB#0: +; SKX-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; SKX-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_psrld: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -5945,6 +6580,11 @@ define <4 x i32> @test_psrldq(<4 x i32> %a0) { ; SKYLAKE-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psrldq: +; SKX: # BB#0: +; SKX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_psrldq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] @@ -6001,6 +6641,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psrlq: +; SKX: # BB#0: +; SKX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; SKX-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_psrlq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -6066,6 +6713,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psrlw: +; SKX: # BB#0: +; SKX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; SKX-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_psrlw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -6129,6 +6783,12 @@ define <16 x i8> @test_psubb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psubb: +; SKX: # BB#0: +; SKX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_psubb: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -6187,6 +6847,12 @@ define <4 x i32> @test_psubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psubd: +; SKX: # BB#0: +; SKX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_psubd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -6241,6 +6907,12 @@ define <2 x i64> @test_psubq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psubq: +; SKX: # BB#0: +; SKX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_psubq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -6299,6 +6971,12 @@ define <16 x i8> @test_psubsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psubsb: +; SKX: # BB#0: +; SKX-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_psubsb: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -6358,6 +7036,12 @@ define <8 x i16> @test_psubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psubsw: +; SKX: # BB#0: +; SKX-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_psubsw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -6417,6 +7101,12 @@ define <16 x i8> @test_psubusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psubusb: +; SKX: # BB#0: +; SKX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_psubusb: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -6476,6 +7166,12 @@ define <8 x i16> @test_psubusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psubusw: +; SKX: # BB#0: +; SKX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_psubusw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -6535,6 +7231,12 @@ define <8 x i16> @test_psubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psubw: +; SKX: # BB#0: +; SKX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_psubw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -6593,6 +7295,12 @@ define <16 x i8> @test_punpckhbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_punpckhbw: +; SKX: # BB#0: +; SKX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] +; SKX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_punpckhbw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] @@ -6655,6 +7363,13 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_punpckhdq: +; SKX: # BB#0: +; SKX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; SKX-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [1:1.00] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_punpckhdq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] @@ -6718,6 +7433,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_punpckhqdq: +; SKX: # BB#0: +; SKX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] +; SKX-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_punpckhqdq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] @@ -6779,6 +7501,12 @@ define <8 x i16> @test_punpckhwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_punpckhwd: +; SKX: # BB#0: +; SKX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] +; SKX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_punpckhwd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] @@ -6837,6 +7565,12 @@ define <16 x i8> @test_punpcklbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_punpcklbw: +; SKX: # BB#0: +; SKX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] +; SKX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_punpcklbw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] @@ -6899,6 +7633,13 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_punpckldq: +; SKX: # BB#0: +; SKX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] +; SKX-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [1:1.00] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_punpckldq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] @@ -6962,6 +7703,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_punpcklqdq: +; SKX: # BB#0: +; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_punpcklqdq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] @@ -7023,6 +7771,12 @@ define <8 x i16> @test_punpcklwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_punpcklwd: +; SKX: # BB#0: +; SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_punpcklwd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] @@ -7083,6 +7837,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pxor: +; SKX: # BB#0: +; SKX-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pxor: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -7146,6 +7907,13 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_shufpd: +; SKX: # BB#0: +; SKX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] +; SKX-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [1:1.00] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_shufpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50] @@ -7210,6 +7978,13 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_sqrtpd: +; SKX: # BB#0: +; SKX-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [18:1.00] +; SKX-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [18:1.00] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_sqrtpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [26:21.00] @@ -7282,6 +8057,14 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_sqrtsd: +; SKX: # BB#0: +; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:1.00] +; SKX-NEXT: vmovapd (%rdi), %xmm1 # sched: [1:0.50] +; SKX-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [18:1.00] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_sqrtsd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovapd (%rdi), %xmm1 # sched: [5:1.00] @@ -7342,6 +8125,12 @@ define <2 x double> @test_subpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKYLAKE-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_subpd: +; SKX: # BB#0: +; SKX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_subpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -7396,6 +8185,12 @@ define double @test_subsd(double %a0, double %a1, double *%a2) { ; SKYLAKE-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_subsd: +; SKX: # BB#0: +; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_subsd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -7498,6 +8293,20 @@ define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) ; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_ucomisd: +; SKX: # BB#0: +; SKX-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] +; SKX-NEXT: setnp %al # sched: [1:1.00] +; SKX-NEXT: sete %cl # sched: [1:1.00] +; SKX-NEXT: andb %al, %cl # sched: [1:0.25] +; SKX-NEXT: vucomisd (%rdi), %xmm0 # sched: [8:1.00] +; SKX-NEXT: setnp %al # sched: [1:1.00] +; SKX-NEXT: sete %dl # sched: [1:1.00] +; SKX-NEXT: andb %al, %dl # sched: [1:0.25] +; SKX-NEXT: orb %cl, %dl # sched: [1:0.25] +; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_ucomisd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] @@ -7576,6 +8385,13 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_unpckhpd: +; SKX: # BB#0: +; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] +; SKX-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_unpckhpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] @@ -7645,6 +8461,13 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_unpcklpd: +; SKX: # BB#0: +; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [1:1.00] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_unpcklpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] @@ -7708,6 +8531,13 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_xorpd: +; SKX: # BB#0: +; SKX-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_xorpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] diff --git a/test/CodeGen/X86/sse3-schedule.ll b/test/CodeGen/X86/sse3-schedule.ll index e8bd6fe5c60..2d592c7587c 100644 --- a/test/CodeGen/X86/sse3-schedule.ll +++ b/test/CodeGen/X86/sse3-schedule.ll @@ -6,6 +6,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 @@ -46,6 +47,12 @@ define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SKYLAKE-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_addsubpd: +; SKX: # BB#0: +; SKX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_addsubpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -101,6 +108,12 @@ define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SKYLAKE-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_addsubps: +; SKX: # BB#0: +; SKX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_addsubps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -156,6 +169,12 @@ define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; SKYLAKE-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [6:2.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_haddpd: +; SKX: # BB#0: +; SKX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00] +; SKX-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [6:2.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_haddpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -211,6 +230,12 @@ define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; SKYLAKE-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [6:2.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_haddps: +; SKX: # BB#0: +; SKX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00] +; SKX-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [6:2.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_haddps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -266,6 +291,12 @@ define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; SKYLAKE-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [6:2.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_hsubpd: +; SKX: # BB#0: +; SKX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00] +; SKX-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [6:2.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_hsubpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -321,6 +352,12 @@ define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; SKYLAKE-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [6:2.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_hsubps: +; SKX: # BB#0: +; SKX-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00] +; SKX-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [6:2.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_hsubps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -372,6 +409,11 @@ define <16 x i8> @test_lddqu(i8* %a0) { ; SKYLAKE-NEXT: vlddqu (%rdi), %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_lddqu: +; SKX: # BB#0: +; SKX-NEXT: vlddqu (%rdi), %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_lddqu: ; BTVER2: # BB#0: ; BTVER2-NEXT: vlddqu (%rdi), %xmm0 # sched: [5:1.00] @@ -429,6 +471,13 @@ define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) { ; SKYLAKE-NEXT: monitor # sched: [100:0.25] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_monitor: +; SKX: # BB#0: +; SKX-NEXT: leaq (%rdi), %rax # sched: [1:0.50] +; SKX-NEXT: movl %esi, %ecx # sched: [1:0.25] +; SKX-NEXT: monitor # sched: [100:0.25] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_monitor: ; BTVER2: # BB#0: ; BTVER2-NEXT: leaq (%rdi), %rax # sched: [1:0.50] @@ -491,6 +540,13 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movddup: +; SKX: # BB#0: +; SKX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] +; SKX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [1:0.50] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movddup: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:1.00] @@ -555,6 +611,13 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movshdup: +; SKX: # BB#0: +; SKX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] +; SKX-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [1:0.50] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movshdup: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [5:1.00] @@ -619,6 +682,13 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movsldup: +; SKX: # BB#0: +; SKX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] +; SKX-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [1:0.50] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movsldup: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [5:1.00] @@ -682,6 +752,13 @@ define void @test_mwait(i32 %a0, i32 %a1) { ; SKYLAKE-NEXT: mwait # sched: [20:2.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_mwait: +; SKX: # BB#0: +; SKX-NEXT: movl %edi, %ecx # sched: [1:0.25] +; SKX-NEXT: movl %esi, %eax # sched: [1:0.25] +; SKX-NEXT: mwait # sched: [20:2.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_mwait: ; BTVER2: # BB#0: ; BTVER2-NEXT: movl %edi, %ecx # sched: [1:0.17] diff --git a/test/CodeGen/X86/sse41-schedule.ll b/test/CodeGen/X86/sse41-schedule.ll index 824b688eb98..f5b9a883cf3 100644 --- a/test/CodeGen/X86/sse41-schedule.ll +++ b/test/CodeGen/X86/sse41-schedule.ll @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 @@ -44,6 +45,14 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_blendpd: +; SKX: # BB#0: +; SKX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:1.00] +; SKX-NEXT: vmovapd (%rdi), %xmm2 # sched: [1:0.50] +; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm2[1] sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_blendpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] @@ -95,6 +104,12 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_blendps: +; SKX: # BB#0: +; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] +; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_blendps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] @@ -149,6 +164,12 @@ define <2 x double> @test_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SKYLAKE-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:0.67] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_blendvpd: +; SKX: # BB#0: +; SKX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] +; SKX-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:0.67] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_blendvpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -204,6 +225,12 @@ define <4 x float> @test_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SKYLAKE-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:0.67] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_blendvps: +; SKX: # BB#0: +; SKX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] +; SKX-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:0.67] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_blendvps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -253,6 +280,12 @@ define <2 x double> @test_dppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKYLAKE-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_dppd: +; SKX: # BB#0: +; SKX-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] +; SKX-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_dppd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -302,6 +335,12 @@ define <4 x float> @test_dpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2 ; SKYLAKE-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [13:1.33] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_dpps: +; SKX: # BB#0: +; SKX-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [13:1.33] +; SKX-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [13:1.33] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_dpps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -351,6 +390,12 @@ define <4 x float> @test_insertps(<4 x float> %a0, <4 x float> %a1, float *%a2) ; SKYLAKE-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_insertps: +; SKX: # BB#0: +; SKX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] +; SKX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_insertps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50] @@ -395,6 +440,11 @@ define <2 x i64> @test_movntdqa(i8* %a0) { ; SKYLAKE-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_movntdqa: +; SKX: # BB#0: +; SKX-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_movntdqa: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [5:1.00] @@ -440,6 +490,12 @@ define <8 x i16> @test_mpsadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [4:2.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_mpsadbw: +; SKX: # BB#0: +; SKX-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [4:2.00] +; SKX-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [4:2.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_mpsadbw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [3:2.00] @@ -490,6 +546,12 @@ define <8 x i16> @test_packusdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_packusdw: +; SKX: # BB#0: +; SKX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_packusdw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -546,6 +608,12 @@ define <16 x i8> @test_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2, <16 ; SKYLAKE-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:0.67] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pblendvb: +; SKX: # BB#0: +; SKX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] +; SKX-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:0.67] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pblendvb: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -595,6 +663,12 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pblendw: +; SKX: # BB#0: +; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] +; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pblendw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] @@ -643,6 +717,14 @@ define <2 x i64> @test_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pcmpeqq: +; SKX: # BB#0: +; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 +; SKX-NEXT: vpmovm2q %k0, %xmm0 +; SKX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 +; SKX-NEXT: vpmovm2q %k0, %xmm0 +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pcmpeqq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -693,6 +775,12 @@ define i32 @test_pextrb(<16 x i8> %a0, i8 *%a1) { ; SKYLAKE-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pextrb: +; SKX: # BB#0: +; SKX-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00] +; SKX-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pextrb: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpextrb $3, %xmm0, %eax # sched: [1:0.50] @@ -742,6 +830,12 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) { ; SKYLAKE-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pextrd: +; SKX: # BB#0: +; SKX-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00] +; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pextrd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpextrd $3, %xmm0, %eax # sched: [1:0.50] @@ -790,6 +884,12 @@ define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) { ; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pextrq: +; SKX: # BB#0: +; SKX-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00] +; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pextrq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpextrq $1, %xmm0, %rax # sched: [1:0.50] @@ -838,6 +938,12 @@ define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) { ; SKYLAKE-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pextrw: +; SKX: # BB#0: +; SKX-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00] +; SKX-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pextrw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpextrw $3, %xmm0, %eax # sched: [1:0.50] @@ -887,6 +993,12 @@ define <8 x i16> @test_phminposuw(<8 x i16> *%a0) { ; SKYLAKE-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:0.33] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_phminposuw: +; SKX: # BB#0: +; SKX-NEXT: vphminposuw (%rdi), %xmm0 # sched: [4:0.50] +; SKX-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_phminposuw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vphminposuw (%rdi), %xmm0 # sched: [7:1.00] @@ -936,6 +1048,12 @@ define <16 x i8> @test_pinsrb(<16 x i8> %a0, i8 %a1, i8 *%a2) { ; SKYLAKE-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pinsrb: +; SKX: # BB#0: +; SKX-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] +; SKX-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pinsrb: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.50] @@ -984,6 +1102,12 @@ define <4 x i32> @test_pinsrd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; SKYLAKE-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pinsrd: +; SKX: # BB#0: +; SKX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] +; SKX-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pinsrd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.50] @@ -1037,6 +1161,13 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pinsrq: +; SKX: # BB#0: +; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00] +; SKX-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [1:1.00] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pinsrq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00] @@ -1088,6 +1219,12 @@ define <16 x i8> @test_pmaxsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmaxsb: +; SKX: # BB#0: +; SKX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmaxsb: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -1137,6 +1274,12 @@ define <4 x i32> @test_pmaxsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmaxsd: +; SKX: # BB#0: +; SKX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmaxsd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -1186,6 +1329,12 @@ define <4 x i32> @test_pmaxud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmaxud: +; SKX: # BB#0: +; SKX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmaxud: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -1235,6 +1384,12 @@ define <8 x i16> @test_pmaxuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmaxuw: +; SKX: # BB#0: +; SKX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmaxuw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -1284,6 +1439,12 @@ define <16 x i8> @test_pminsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pminsb: +; SKX: # BB#0: +; SKX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pminsb: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -1333,6 +1494,12 @@ define <4 x i32> @test_pminsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pminsd: +; SKX: # BB#0: +; SKX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pminsd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -1382,6 +1549,12 @@ define <4 x i32> @test_pminud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pminud: +; SKX: # BB#0: +; SKX-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pminud: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -1431,6 +1604,12 @@ define <8 x i16> @test_pminuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pminuw: +; SKX: # BB#0: +; SKX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pminuw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -1486,6 +1665,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmovsxbw: +; SKX: # BB#0: +; SKX-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [1:1.00] +; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmovsxbw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00] @@ -1544,6 +1730,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmovsxbd: +; SKX: # BB#0: +; SKX-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [1:1.00] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmovsxbd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00] @@ -1602,6 +1795,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmovsxbq: +; SKX: # BB#0: +; SKX-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [1:1.00] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmovsxbq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00] @@ -1660,6 +1860,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmovsxdq: +; SKX: # BB#0: +; SKX-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [1:1.00] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmovsxdq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00] @@ -1718,6 +1925,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmovsxwd: +; SKX: # BB#0: +; SKX-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [1:1.00] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmovsxwd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00] @@ -1776,6 +1990,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmovsxwq: +; SKX: # BB#0: +; SKX-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [1:1.00] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmovsxwq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00] @@ -1834,6 +2055,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmovzxbw: +; SKX: # BB#0: +; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] +; SKX-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [1:1.00] +; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmovzxbw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] @@ -1892,6 +2120,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmovzxbd: +; SKX: # BB#0: +; SKX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] +; SKX-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [1:1.00] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmovzxbd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] @@ -1950,6 +2185,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmovzxbq: +; SKX: # BB#0: +; SKX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] +; SKX-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmovzxbq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] @@ -2008,6 +2250,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmovzxdq: +; SKX: # BB#0: +; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] +; SKX-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [1:1.00] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmovzxdq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00] @@ -2066,6 +2315,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmovzxwd: +; SKX: # BB#0: +; SKX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] +; SKX-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [1:1.00] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmovzxwd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] @@ -2124,6 +2380,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmovzxwq: +; SKX: # BB#0: +; SKX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] +; SKX-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [1:1.00] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmovzxwq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] @@ -2176,6 +2439,12 @@ define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmuldq: +; SKX: # BB#0: +; SKX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmuldq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -2226,6 +2495,12 @@ define <4 x i32> @test_pmulld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [8:0.67] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmulld: +; SKX: # BB#0: +; SKX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [8:0.67] +; SKX-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [8:0.67] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmulld: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -2294,6 +2569,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: movzbl %cl, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_ptest: +; SKX: # BB#0: +; SKX-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00] +; SKX-NEXT: setb %al # sched: [1:1.00] +; SKX-NEXT: vptest (%rdi), %xmm0 # sched: [3:1.00] +; SKX-NEXT: setb %cl # sched: [1:1.00] +; SKX-NEXT: andb %al, %cl # sched: [1:0.25] +; SKX-NEXT: movzbl %cl, %eax # sched: [1:0.25] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_ptest: ; BTVER2: # BB#0: ; BTVER2-NEXT: vptest %xmm1, %xmm0 # sched: [1:0.50] @@ -2358,6 +2643,13 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_roundpd: +; SKX: # BB#0: +; SKX-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:0.67] +; SKX-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [8:0.67] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_roundpd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [8:1.00] @@ -2416,6 +2708,13 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_roundps: +; SKX: # BB#0: +; SKX-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:0.67] +; SKX-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [8:0.67] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_roundps: ; BTVER2: # BB#0: ; BTVER2-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [8:1.00] @@ -2475,6 +2774,13 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_roundsd: +; SKX: # BB#0: +; SKX-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67] +; SKX-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] +; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_roundsd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] @@ -2534,6 +2840,13 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_roundss: +; SKX: # BB#0: +; SKX-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67] +; SKX-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] +; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_roundss: ; BTVER2: # BB#0: ; BTVER2-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] diff --git a/test/CodeGen/X86/sse42-schedule.ll b/test/CodeGen/X86/sse42-schedule.ll index 7b9471c7048..681ab92f185 100644 --- a/test/CodeGen/X86/sse42-schedule.ll +++ b/test/CodeGen/X86/sse42-schedule.ll @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 @@ -44,6 +45,13 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) { ; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: crc32_32_8: +; SKX: # BB#0: +; SKX-NEXT: crc32b %sil, %edi # sched: [3:1.00] +; SKX-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] +; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: crc32_32_8: ; BTVER2: # BB#0: ; BTVER2-NEXT: crc32b %sil, %edi # sched: [3:1.00] @@ -100,6 +108,13 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) { ; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: crc32_32_16: +; SKX: # BB#0: +; SKX-NEXT: crc32w %si, %edi # sched: [3:1.00] +; SKX-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] +; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: crc32_32_16: ; BTVER2: # BB#0: ; BTVER2-NEXT: crc32w %si, %edi # sched: [3:1.00] @@ -156,6 +171,13 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) { ; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: crc32_32_32: +; SKX: # BB#0: +; SKX-NEXT: crc32l %esi, %edi # sched: [3:1.00] +; SKX-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] +; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: crc32_32_32: ; BTVER2: # BB#0: ; BTVER2-NEXT: crc32l %esi, %edi # sched: [3:1.00] @@ -212,6 +234,13 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind { ; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: crc32_64_8: +; SKX: # BB#0: +; SKX-NEXT: crc32b %sil, %edi # sched: [3:1.00] +; SKX-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] +; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: crc32_64_8: ; BTVER2: # BB#0: ; BTVER2-NEXT: crc32b %sil, %edi # sched: [3:1.00] @@ -268,6 +297,13 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) { ; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: crc32_64_64: +; SKX: # BB#0: +; SKX-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] +; SKX-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] +; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: crc32_64_64: ; BTVER2: # BB#0: ; BTVER2-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] @@ -354,6 +390,19 @@ define i32 @test_pcmpestri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pcmpestri: +; SKX: # BB#0: +; SKX-NEXT: movl $7, %eax # sched: [1:0.25] +; SKX-NEXT: movl $7, %edx # sched: [1:0.25] +; SKX-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00] +; SKX-NEXT: movl %ecx, %esi # sched: [1:0.25] +; SKX-NEXT: movl $7, %eax # sched: [1:0.25] +; SKX-NEXT: movl $7, %edx # sched: [1:0.25] +; SKX-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [18:4.00] +; SKX-NEXT: # kill: %ECX %ECX %RCX +; SKX-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pcmpestri: ; BTVER2: # BB#0: ; BTVER2-NEXT: movl $7, %eax # sched: [1:0.17] @@ -438,6 +487,16 @@ define <16 x i8> @test_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [19:4.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pcmpestrm: +; SKX: # BB#0: +; SKX-NEXT: movl $7, %eax # sched: [1:0.25] +; SKX-NEXT: movl $7, %edx # sched: [1:0.25] +; SKX-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00] +; SKX-NEXT: movl $7, %eax # sched: [1:0.25] +; SKX-NEXT: movl $7, %edx # sched: [1:0.25] +; SKX-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [19:4.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pcmpestrm: ; BTVER2: # BB#0: ; BTVER2-NEXT: movl $7, %eax # sched: [1:0.17] @@ -510,6 +569,15 @@ define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pcmpistri: +; SKX: # BB#0: +; SKX-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [10:3.00] +; SKX-NEXT: movl %ecx, %eax # sched: [1:0.25] +; SKX-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [10:3.00] +; SKX-NEXT: # kill: %ECX %ECX %RCX +; SKX-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pcmpistri: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [6:1.00] @@ -566,6 +634,12 @@ define <16 x i8> @test_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [10:3.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pcmpistrm: +; SKX: # BB#0: +; SKX-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00] +; SKX-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [10:3.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pcmpistrm: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [7:1.00] @@ -615,6 +689,14 @@ define <2 x i64> @test_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pcmpgtq: +; SKX: # BB#0: +; SKX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 +; SKX-NEXT: vpmovm2q %k0, %xmm0 +; SKX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 +; SKX-NEXT: vpmovm2q %k0, %xmm0 +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pcmpgtq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -665,6 +747,12 @@ define <2 x i64> @test_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pclmulqdq: +; SKX: # BB#0: +; SKX-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [6:1.00] +; SKX-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pclmulqdq: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] diff --git a/test/CodeGen/X86/ssse3-schedule.ll b/test/CodeGen/X86/ssse3-schedule.ll index 1ddcaf813b5..ad57f19901d 100644 --- a/test/CodeGen/X86/ssse3-schedule.ll +++ b/test/CodeGen/X86/ssse3-schedule.ll @@ -6,6 +6,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 @@ -53,6 +54,13 @@ define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) { ; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pabsb: +; SKX: # BB#0: +; SKX-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpabsb (%rdi), %xmm1 # sched: [1:0.50] +; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pabsb: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpabsb (%rdi), %xmm1 # sched: [6:1.00] @@ -118,6 +126,13 @@ define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) { ; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pabsd: +; SKX: # BB#0: +; SKX-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpabsd (%rdi), %xmm1 # sched: [1:0.50] +; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pabsd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpabsd (%rdi), %xmm1 # sched: [6:1.00] @@ -183,6 +198,13 @@ define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) { ; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pabsw: +; SKX: # BB#0: +; SKX-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpabsw (%rdi), %xmm1 # sched: [1:0.50] +; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pabsw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpabsw (%rdi), %xmm1 # sched: [6:1.00] @@ -246,6 +268,12 @@ define <8 x i16> @test_palignr(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_palignr: +; SKX: # BB#0: +; SKX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] +; SKX-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_palignr: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] @@ -300,6 +328,12 @@ define <4 x i32> @test_phaddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_phaddd: +; SKX: # BB#0: +; SKX-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; SKX-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_phaddd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -355,6 +389,12 @@ define <8 x i16> @test_phaddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_phaddsw: +; SKX: # BB#0: +; SKX-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; SKX-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_phaddsw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -410,6 +450,12 @@ define <8 x i16> @test_phaddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_phaddw: +; SKX: # BB#0: +; SKX-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; SKX-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_phaddw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -465,6 +511,12 @@ define <4 x i32> @test_phsubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_phsubd: +; SKX: # BB#0: +; SKX-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; SKX-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_phsubd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -520,6 +572,12 @@ define <8 x i16> @test_phsubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_phsubsw: +; SKX: # BB#0: +; SKX-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; SKX-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_phsubsw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -575,6 +633,12 @@ define <8 x i16> @test_phsubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_phsubw: +; SKX: # BB#0: +; SKX-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; SKX-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_phsubw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -630,6 +694,12 @@ define <8 x i16> @test_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmaddubsw: +; SKX: # BB#0: +; SKX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmaddubsw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -686,6 +756,12 @@ define <8 x i16> @test_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pmulhrsw: +; SKX: # BB#0: +; SKX-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pmulhrsw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -741,6 +817,12 @@ define <16 x i8> @test_pshufb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_pshufb: +; SKX: # BB#0: +; SKX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_pshufb: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -800,6 +882,12 @@ define <16 x i8> @test_psignb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psignb: +; SKX: # BB#0: +; SKX-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_psignb: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -859,6 +947,12 @@ define <4 x i32> @test_psignd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psignd: +; SKX: # BB#0: +; SKX-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_psignd: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -918,6 +1012,12 @@ define <8 x i16> @test_psignw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; +; SKX-LABEL: test_psignw: +; SKX: # BB#0: +; SKX-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [2:1.00] +; ; BTVER2-LABEL: test_psignw: ; BTVER2: # BB#0: ; BTVER2-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -- 2.50.1