; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
; SKYLAKE-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_addpd:
+; SKX: # BB#0:
+; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_addpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
; SKYLAKE-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_addps:
+; SKX: # BB#0:
+; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_addps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
; SKYLAKE-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_addsubpd:
+; SKX: # BB#0:
+; SKX-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_addsubpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
; SKYLAKE-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_addsubps:
+; SKX: # BB#0:
+; SKX-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_addsubps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_andnotpd:
+; SKX: # BB#0:
+; SKX-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_andnotpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_andnotps:
+; SKX: # BB#0:
+; SKX-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_andnotps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_andpd:
+; SKX: # BB#0:
+; SKX-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_andpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_andps:
+; SKX: # BB#0:
+; SKX-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_andps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_blendpd:
+; SKX: # BB#0:
+; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50]
+; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_blendpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50]
; SKYLAKE-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_blendps:
+; SKX: # BB#0:
+; SKX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50]
+; SKX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_blendps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50]
; SKYLAKE-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:0.67]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_blendvpd:
+; SKX: # BB#0:
+; SKX-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
+; SKX-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:0.67]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_blendvpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
; SKYLAKE-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:0.67]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_blendvps:
+; SKX: # BB#0:
+; SKX-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
+; SKX-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:0.67]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_blendvps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
; SKYLAKE-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_broadcastf128:
+; SKX: # BB#0:
+; SKX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_broadcastf128:
; BTVER2: # BB#0:
; BTVER2-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [6:1.00]
; SKYLAKE-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_broadcastsd_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_broadcastsd_ymm:
; BTVER2: # BB#0:
; BTVER2-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [6:1.00]
; SKYLAKE-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_broadcastss:
+; SKX: # BB#0:
+; SKX-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_broadcastss:
; BTVER2: # BB#0:
; BTVER2-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [5:1.00]
; SKYLAKE-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_broadcastss_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_broadcastss_ymm:
; BTVER2: # BB#0:
; BTVER2-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [6:1.00]
; SKYLAKE-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cmppd:
+; SKX: # BB#0:
+; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
+; SKX-NEXT: vcmpeqpd (%rdi), %ymm0, %k1
+; SKX-NEXT: vpmovm2q %k0, %ymm0
+; SKX-NEXT: vpmovm2q %k1, %ymm1
+; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cmppd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
; SKYLAKE-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cmpps:
+; SKX: # BB#0:
+; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %k0
+; SKX-NEXT: vcmpeqps (%rdi), %ymm0, %k1
+; SKX-NEXT: vpmovm2d %k0, %ymm0
+; SKX-NEXT: vpmovm2d %k1, %ymm1
+; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cmpps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvtdq2pd:
+; SKX: # BB#0:
+; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00]
+; SKX-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [7:1.00]
+; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvtdq2pd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [8:1.00]
; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvtdq2ps:
+; SKX: # BB#0:
+; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [4:0.50]
+; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvtdq2ps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [8:1.00]
; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvtpd2dq:
+; SKX: # BB#0:
+; SKX-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00]
+; SKX-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
+; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvtpd2dq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvtpd2ps:
+; SKX: # BB#0:
+; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00]
+; SKX-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00]
+; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvtpd2ps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00]
; SKYLAKE-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvtps2dq:
+; SKX: # BB#0:
+; SKX-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [4:0.50]
+; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvtps2dq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [8:1.00]
; SKYLAKE-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [14:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_divpd:
+; SKX: # BB#0:
+; SKX-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [14:1.00]
+; SKX-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [14:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_divpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [38:38.00]
; SKYLAKE-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_divps:
+; SKX: # BB#0:
+; SKX-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [11:1.00]
+; SKX-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_divps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [38:38.00]
; SKYLAKE-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [13:1.33]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_dpps:
+; SKX: # BB#0:
+; SKX-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [13:1.33]
+; SKX-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [13:1.33]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_dpps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_extractf128:
+; SKX: # BB#0:
+; SKX-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00]
+; SKX-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: vzeroupper # sched: [4:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_extractf128:
; BTVER2: # BB#0:
; BTVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_haddpd:
+; SKX: # BB#0:
+; SKX-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
+; SKX-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_haddpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
; SKYLAKE-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_haddps:
+; SKX: # BB#0:
+; SKX-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
+; SKX-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_haddps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
; SKYLAKE-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_hsubpd:
+; SKX: # BB#0:
+; SKX-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
+; SKX-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_hsubpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
; SKYLAKE-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_hsubps:
+; SKX: # BB#0:
+; SKX-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
+; SKX-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_hsubps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_insertf128:
+; SKX: # BB#0:
+; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
+; SKX-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_insertf128:
; BTVER2: # BB#0:
; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:0.50]
; SKYLAKE-NEXT: vlddqu (%rdi), %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_lddqu:
+; SKX: # BB#0:
+; SKX-NEXT: vlddqu (%rdi), %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_lddqu:
; BTVER2: # BB#0:
; BTVER2-NEXT: vlddqu (%rdi), %ymm0 # sched: [5:1.00]
; SKYLAKE-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_maskmovpd:
+; SKX: # BB#0:
+; SKX-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [1:0.50]
+; SKX-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_maskmovpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2
; SKYLAKE-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_maskmovpd_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [1:0.50]
+; SKX-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_maskmovpd_ymm:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2
; SKYLAKE-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_maskmovps:
+; SKX: # BB#0:
+; SKX-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [1:0.50]
+; SKX-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_maskmovps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2
; SKYLAKE-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_maskmovps_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [1:0.50]
+; SKX-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_maskmovps_ymm:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2
; SKYLAKE-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_maxpd:
+; SKX: # BB#0:
+; SKX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; SKX-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_maxpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_maxps:
+; SKX: # BB#0:
+; SKX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; SKX-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_maxps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_minpd:
+; SKX: # BB#0:
+; SKX-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; SKX-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_minpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_minps:
+; SKX: # BB#0:
+; SKX-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; SKX-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_minps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movapd:
+; SKX: # BB#0:
+; SKX-NEXT: vmovapd (%rdi), %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movapd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovapd (%rdi), %ymm0 # sched: [5:1.00]
; SKYLAKE-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movaps:
+; SKX: # BB#0:
+; SKX-NEXT: vmovaps (%rdi), %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movaps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovaps (%rdi), %ymm0 # sched: [5:1.00]
; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movddup:
+; SKX: # BB#0:
+; SKX-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
+; SKX-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [1:0.50]
+; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movddup:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [5:1.00]
; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movmskpd:
+; SKX: # BB#0:
+; SKX-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00]
+; SKX-NEXT: vzeroupper # sched: [4:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movmskpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovmskpd %ymm0, %eax # sched: [1:0.50]
; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movmskps:
+; SKX: # BB#0:
+; SKX-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00]
+; SKX-NEXT: vzeroupper # sched: [4:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movmskps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovmskps %ymm0, %eax # sched: [1:0.50]
; SKYLAKE-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movntpd:
+; SKX: # BB#0:
+; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movntpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:2.00]
; SKYLAKE-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movntps:
+; SKX: # BB#0:
+; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movntps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:2.00]
; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movshdup:
+; SKX: # BB#0:
+; SKX-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
+; SKX-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [1:0.50]
+; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movshdup:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [5:1.00]
; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movsldup:
+; SKX: # BB#0:
+; SKX-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
+; SKX-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [1:0.50]
+; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movsldup:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [5:1.00]
; SKYLAKE-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movupd:
+; SKX: # BB#0:
+; SKX-NEXT: vmovupd (%rdi), %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movupd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovupd (%rdi), %ymm0 # sched: [5:1.00]
; SKYLAKE-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movups:
+; SKX: # BB#0:
+; SKX-NEXT: vmovups (%rdi), %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movups:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovups (%rdi), %ymm0 # sched: [5:1.00]
; SKYLAKE-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_mulpd:
+; SKX: # BB#0:
+; SKX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_mulpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:4.00]
; SKYLAKE-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_mulps:
+; SKX: # BB#0:
+; SKX-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_mulps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: orpd:
+; SKX: # BB#0:
+; SKX-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: orpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_orps:
+; SKX: # BB#0:
+; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_orps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_perm2f128:
+; SKX: # BB#0:
+; SKX-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
+; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00]
+; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_perm2f128:
; BTVER2: # BB#0:
; BTVER2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:0.50]
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_permilpd:
+; SKX: # BB#0:
+; SKX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
+; SKX-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [1:1.00]
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_permilpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [6:1.00]
; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_permilpd_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
+; SKX-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [1:1.00]
+; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_permilpd_ymm:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [6:1.00]
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_permilps:
+; SKX: # BB#0:
+; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
+; SKX-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [1:1.00]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_permilps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00]
; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_permilps_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
+; SKX-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [1:1.00]
+; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_permilps_ymm:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [6:1.00]
; SKYLAKE-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_permilvarpd:
+; SKX: # BB#0:
+; SKX-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_permilvarpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_permilvarpd_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_permilvarpd_ymm:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_permilvarps:
+; SKX: # BB#0:
+; SKX-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_permilvarps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_permilvarps_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_permilvarps_ymm:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_rcpps:
+; SKX: # BB#0:
+; SKX-NEXT: vrcp14ps %ymm0, %ymm0
+; SKX-NEXT: vrcp14ps (%rdi), %ymm1
+; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_rcpps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vrcpps (%rdi), %ymm1 # sched: [7:2.00]
; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_roundpd:
+; SKX: # BB#0:
+; SKX-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [8:0.67]
+; SKX-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [8:0.67]
+; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_roundpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [8:1.00]
; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_roundps:
+; SKX: # BB#0:
+; SKX-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [8:0.67]
+; SKX-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [8:0.67]
+; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_roundps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [8:1.00]
; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_rsqrtps:
+; SKX: # BB#0:
+; SKX-NEXT: vrsqrt14ps %ymm0, %ymm0
+; SKX-NEXT: vrsqrt14ps (%rdi), %ymm1
+; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_rsqrtps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [7:2.00]
; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_shufpd:
+; SKX: # BB#0:
+; SKX-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
+; SKX-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [1:1.00]
+; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_shufpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:0.50]
; SKYLAKE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_shufps:
+; SKX: # BB#0:
+; SKX-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
+; SKX-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_shufps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:0.50]
; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_sqrtpd:
+; SKX: # BB#0:
+; SKX-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [18:1.00]
+; SKX-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [18:1.00]
+; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_sqrtpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [59:54.00]
; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_sqrtps:
+; SKX: # BB#0:
+; SKX-NEXT: vsqrtps %ymm0, %ymm0 # sched: [12:1.00]
+; SKX-NEXT: vsqrtps (%rdi), %ymm1 # sched: [12:1.00]
+; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_sqrtps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vsqrtps (%rdi), %ymm1 # sched: [47:42.00]
; SKYLAKE-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_subpd:
+; SKX: # BB#0:
+; SKX-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_subpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
; SKYLAKE-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_subps:
+; SKX: # BB#0:
+; SKX-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_subps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_testpd:
+; SKX: # BB#0:
+; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25]
+; SKX-NEXT: vtestpd %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-NEXT: setb %al # sched: [1:1.00]
+; SKX-NEXT: vtestpd (%rdi), %xmm0 # sched: [2:1.00]
+; SKX-NEXT: adcl $0, %eax # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_testpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: xorl %eax, %eax # sched: [1:0.50]
; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_testpd_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25]
+; SKX-NEXT: vtestpd %ymm1, %ymm0 # sched: [2:1.00]
+; SKX-NEXT: setb %al # sched: [1:1.00]
+; SKX-NEXT: vtestpd (%rdi), %ymm0 # sched: [2:1.00]
+; SKX-NEXT: adcl $0, %eax # sched: [1:1.00]
+; SKX-NEXT: vzeroupper # sched: [4:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_testpd_ymm:
; BTVER2: # BB#0:
; BTVER2-NEXT: xorl %eax, %eax # sched: [1:0.50]
; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_testps:
+; SKX: # BB#0:
+; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25]
+; SKX-NEXT: vtestps %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-NEXT: setb %al # sched: [1:1.00]
+; SKX-NEXT: vtestps (%rdi), %xmm0 # sched: [2:1.00]
+; SKX-NEXT: adcl $0, %eax # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_testps:
; BTVER2: # BB#0:
; BTVER2-NEXT: xorl %eax, %eax # sched: [1:0.50]
; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_testps_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25]
+; SKX-NEXT: vtestps %ymm1, %ymm0 # sched: [2:1.00]
+; SKX-NEXT: setb %al # sched: [1:1.00]
+; SKX-NEXT: vtestps (%rdi), %ymm0 # sched: [2:1.00]
+; SKX-NEXT: adcl $0, %eax # sched: [1:1.00]
+; SKX-NEXT: vzeroupper # sched: [4:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_testps_ymm:
; BTVER2: # BB#0:
; BTVER2-NEXT: xorl %eax, %eax # sched: [1:0.50]
; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_unpckhpd:
+; SKX: # BB#0:
+; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
+; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [1:1.00]
+; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_unpckhpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:0.50]
; SKYLAKE-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_unpckhps:
+; SKX: # BB#0:
+; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
+; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_unpckhps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:0.50]
; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_unpcklpd:
+; SKX: # BB#0:
+; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
+; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [1:1.00]
+; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_unpcklpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:0.50]
; SKYLAKE-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_unpcklps:
+; SKX: # BB#0:
+; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
+; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_unpcklps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:0.50]
; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_xorpd:
+; SKX: # BB#0:
+; SKX-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_xorpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_xorps:
+; SKX: # BB#0:
+; SKX-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_xorps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vzeroall # sched: [16:4.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_zeroall:
+; SKX: # BB#0:
+; SKX-NEXT: vzeroall # sched: [16:4.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_zeroall:
; BTVER2: # BB#0:
; BTVER2-NEXT: vzeroall # sched: [90:?]
; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_zeroupper:
+; SKX: # BB#0:
+; SKX-NEXT: vzeroupper # sched: [4:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_zeroupper:
; BTVER2: # BB#0:
; BTVER2-NEXT: vzeroupper # sched: [46:?]
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
define <8 x i32> @test_broadcasti128(<8 x i32> %a0, <4 x i32> *%a1) {
; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_broadcasti128:
+; SKX: # BB#0:
+; SKX-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [1:0.50]
+; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_broadcasti128:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [8:0.50]
; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_broadcastsd_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
+; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_broadcastsd_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [100:0.25]
; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_broadcastss:
+; SKX: # BB#0:
+; SKX-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_broadcastss:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_broadcastss_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
+; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_broadcastss_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [100:0.25]
; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_extracti128:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.50]
+; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00]
+; SKX-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: vzeroupper # sched: [4:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_extracti128:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.25]
; SKYLAKE-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_gatherdpd:
+; SKX: # BB#0:
+; SKX-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_gatherdpd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
; SKYLAKE-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [20:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_gatherdpd_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [20:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_gatherdpd_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [100:?]
; SKYLAKE-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_gatherdps:
+; SKX: # BB#0:
+; SKX-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_gatherdps:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
; SKYLAKE-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [20:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_gatherdps_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [20:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_gatherdps_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [100:?]
; SKYLAKE-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_gatherqpd:
+; SKX: # BB#0:
+; SKX-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_gatherqpd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
; SKYLAKE-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [20:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_gatherqpd_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [20:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_gatherqpd_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [100:?]
; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_gatherqps:
+; SKX: # BB#0:
+; SKX-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_gatherqps:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_gatherqps_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [20:1.00]
+; SKX-NEXT: vzeroupper # sched: [4:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_gatherqps_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [100:?]
; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_inserti128:
+; SKX: # BB#0:
+; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
+; SKX-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_inserti128:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [2:0.25]
; SKYLAKE-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movntdqa:
+; SKX: # BB#0:
+; SKX-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_movntdqa:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [8:0.50]
; SKYLAKE-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [4:2.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_mpsadbw:
+; SKX: # BB#0:
+; SKX-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [4:2.00]
+; SKX-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [4:2.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_mpsadbw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [100:?]
; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pabsb:
+; SKX: # BB#0:
+; SKX-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpabsb (%rdi), %ymm1 # sched: [1:0.50]
+; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pabsb:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pabsd:
+; SKX: # BB#0:
+; SKX-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpabsd (%rdi), %ymm1 # sched: [1:0.50]
+; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pabsd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pabsw:
+; SKX: # BB#0:
+; SKX-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpabsw (%rdi), %ymm1 # sched: [1:0.50]
+; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pabsw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
; SKYLAKE-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_packssdw:
+; SKX: # BB#0:
+; SKX-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_packssdw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_packsswb:
+; SKX: # BB#0:
+; SKX-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_packsswb:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_packusdw:
+; SKX: # BB#0:
+; SKX-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_packusdw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_packuswb:
+; SKX: # BB#0:
+; SKX-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_packuswb:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_paddb:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_paddb:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_paddd:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_paddd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_paddq:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_paddq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_paddsb:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_paddsb:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_paddsw:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_paddsw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_paddusb:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_paddusb:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_paddusw:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_paddusw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_paddw:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_paddw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_palignr:
+; SKX: # BB#0:
+; SKX-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
+; SKX-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_palignr:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:0.25]
; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pand:
+; SKX: # BB#0:
+; SKX-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pand:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pandn:
+; SKX: # BB#0:
+; SKX-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [1:0.50]
+; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pandn:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pavgb:
+; SKX: # BB#0:
+; SKX-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pavgb:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pavgw:
+; SKX: # BB#0:
+; SKX-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pavgw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pblendd:
+; SKX: # BB#0:
+; SKX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.50]
+; SKX-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [1:0.50]
+; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pblendd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.50]
; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pblendd_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50]
+; SKX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [1:0.50]
+; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pblendd_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50]
; SKYLAKE-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [2:0.67]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pblendvb:
+; SKX: # BB#0:
+; SKX-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
+; SKX-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [2:0.67]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pblendvb:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; SKYLAKE-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pblendw:
+; SKX: # BB#0:
+; SKX-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
+; SKX-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pblendw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [2:0.33]
; SKYLAKE-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pbroadcastb:
+; SKX: # BB#0:
+; SKX-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
+; SKX-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [1:1.00]
+; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pbroadcastb:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [8:1.00]
; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pbroadcastb_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
+; SKX-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [1:1.00]
+; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pbroadcastb_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:2.00]
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pbroadcastd:
+; SKX: # BB#0:
+; SKX-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm0
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pbroadcastd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [8:0.50]
; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pbroadcastd_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
+; SKX-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm0
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pbroadcastd_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [8:0.50]
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pbroadcastq:
+; SKX: # BB#0:
+; SKX-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpaddq (%rdi){1to2}, %xmm0, %xmm0
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pbroadcastq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [8:0.50]
; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pbroadcastq_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
+; SKX-NEXT: vpaddq (%rdi){1to4}, %ymm0, %ymm0
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pbroadcastq_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [8:0.50]
; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pbroadcastw:
+; SKX: # BB#0:
+; SKX-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
+; SKX-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [1:1.00]
+; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pbroadcastw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [8:1.00]
; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pbroadcastw_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
+; SKX-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [1:1.00]
+; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pbroadcastw_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:2.00]
; SKYLAKE-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pcmpeqb:
+; SKX: # BB#0:
+; SKX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0
+; SKX-NEXT: vpmovm2b %k0, %ymm0
+; SKX-NEXT: vpcmpeqb (%rdi), %ymm0, %k0
+; SKX-NEXT: vpmovm2b %k0, %ymm0
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pcmpeqb:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pcmpeqd:
+; SKX: # BB#0:
+; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
+; SKX-NEXT: vpmovm2d %k0, %ymm0
+; SKX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
+; SKX-NEXT: vpmovm2d %k0, %ymm0
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pcmpeqd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pcmpeqq:
+; SKX: # BB#0:
+; SKX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
+; SKX-NEXT: vpmovm2q %k0, %ymm0
+; SKX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
+; SKX-NEXT: vpmovm2q %k0, %ymm0
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pcmpeqq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pcmpeqw:
+; SKX: # BB#0:
+; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
+; SKX-NEXT: vpmovm2w %k0, %ymm0
+; SKX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0
+; SKX-NEXT: vpmovm2w %k0, %ymm0
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pcmpeqw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pcmpgtb:
+; SKX: # BB#0:
+; SKX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0
+; SKX-NEXT: vpmovm2b %k0, %ymm0
+; SKX-NEXT: vpcmpgtb (%rdi), %ymm0, %k0
+; SKX-NEXT: vpmovm2b %k0, %ymm0
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pcmpgtb:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pcmpgtd:
+; SKX: # BB#0:
+; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
+; SKX-NEXT: vpmovm2d %k0, %ymm0
+; SKX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
+; SKX-NEXT: vpmovm2d %k0, %ymm0
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pcmpgtd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pcmpgtq:
+; SKX: # BB#0:
+; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
+; SKX-NEXT: vpmovm2q %k0, %ymm0
+; SKX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
+; SKX-NEXT: vpmovm2q %k0, %ymm0
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pcmpgtq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pcmpgtw:
+; SKX: # BB#0:
+; SKX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
+; SKX-NEXT: vpmovm2w %k0, %ymm0
+; SKX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0
+; SKX-NEXT: vpmovm2w %k0, %ymm0
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pcmpgtw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_perm2i128:
+; SKX: # BB#0:
+; SKX-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
+; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00]
+; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_perm2i128:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [2:0.25]
; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_permd:
+; SKX: # BB#0:
+; SKX-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
+; SKX-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
+; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_permd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [2:0.25]
; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_permpd:
+; SKX: # BB#0:
+; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
+; SKX-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [3:1.00]
+; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_permpd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [107:0.50]
; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_permps:
+; SKX: # BB#0:
+; SKX-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
+; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
+; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_permps:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [100:0.25]
; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_permq:
+; SKX: # BB#0:
+; SKX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
+; SKX-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [3:1.00]
+; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_permq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:0.50]
; SKYLAKE-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pgatherdd:
+; SKX: # BB#0:
+; SKX-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pgatherdd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
; SKYLAKE-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [20:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pgatherdd_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [20:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pgatherdd_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [100:?]
; SKYLAKE-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pgatherdq:
+; SKX: # BB#0:
+; SKX-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pgatherdq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
; SKYLAKE-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [20:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pgatherdq_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [20:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pgatherdq_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [100:?]
; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pgatherqd:
+; SKX: # BB#0:
+; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pgatherqd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pgatherqd_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [20:1.00]
+; SKX-NEXT: vzeroupper # sched: [4:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pgatherqd_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [100:?]
; SKYLAKE-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pgatherqq:
+; SKX: # BB#0:
+; SKX-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pgatherqq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
; SKYLAKE-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [20:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pgatherqq_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [20:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pgatherqq_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [100:?]
; SKYLAKE-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_phaddd:
+; SKX: # BB#0:
+; SKX-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; SKX-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_phaddd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [100:?]
; SKYLAKE-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_phaddsw:
+; SKX: # BB#0:
+; SKX-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; SKX-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_phaddsw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [100:?]
; SKYLAKE-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_phaddw:
+; SKX: # BB#0:
+; SKX-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; SKX-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_phaddw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [100:?]
; SKYLAKE-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_phsubd:
+; SKX: # BB#0:
+; SKX-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; SKX-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_phsubd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [100:?]
; SKYLAKE-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_phsubsw:
+; SKX: # BB#0:
+; SKX-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; SKX-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_phsubsw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [100:?]
; SKYLAKE-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_phsubw:
+; SKX: # BB#0:
+; SKX-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; SKX-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_phsubw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [100:?]
; SKYLAKE-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmaddubsw:
+; SKX: # BB#0:
+; SKX-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmaddubsw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
; SKYLAKE-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmaddwd:
+; SKX: # BB#0:
+; SKX-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmaddwd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
; SKYLAKE-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmaskmovd:
+; SKX: # BB#0:
+; SKX-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [1:0.50]
+; SKX-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmaskmovd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [100:?]
; SKYLAKE-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmaskmovd_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [1:0.50]
+; SKX-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmaskmovd_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [100:?]
; SKYLAKE-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmaskmovq:
+; SKX: # BB#0:
+; SKX-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [1:0.50]
+; SKX-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmaskmovq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
; SKYLAKE-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmaskmovq_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [1:0.50]
+; SKX-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmaskmovq_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:1.50]
; SKYLAKE-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmaxsb:
+; SKX: # BB#0:
+; SKX-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmaxsb:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmaxsd:
+; SKX: # BB#0:
+; SKX-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmaxsd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmaxsw:
+; SKX: # BB#0:
+; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmaxsw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmaxub:
+; SKX: # BB#0:
+; SKX-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmaxub:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmaxud:
+; SKX: # BB#0:
+; SKX-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmaxud:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmaxuw:
+; SKX: # BB#0:
+; SKX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmaxuw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pminsb:
+; SKX: # BB#0:
+; SKX-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pminsb:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pminsd:
+; SKX: # BB#0:
+; SKX-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pminsd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pminsw:
+; SKX: # BB#0:
+; SKX-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pminsw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pminub:
+; SKX: # BB#0:
+; SKX-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pminub:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pminud:
+; SKX: # BB#0:
+; SKX-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pminud:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pminuw:
+; SKX: # BB#0:
+; SKX-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pminuw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmovmskb:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00]
+; SKX-NEXT: vzeroupper # sched: [4:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmovmskb:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00]
; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmovsxbd:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
+; SKX-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [3:1.00]
+; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmovsxbd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:0.50]
; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmovsxbq:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
+; SKX-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [3:1.00]
+; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmovsxbq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:0.50]
; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmovsxbw:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
+; SKX-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [3:1.00]
+; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmovsxbw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:0.50]
; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmovsxdq:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
+; SKX-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [3:1.00]
+; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmovsxdq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:0.50]
; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmovsxwd:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
+; SKX-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [3:1.00]
+; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmovsxwd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:0.50]
; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmovsxwq:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
+; SKX-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [3:1.00]
+; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmovsxwq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:0.50]
; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmovzxbd:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
+; SKX-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [3:1.00]
+; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmovzxbd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:0.50]
; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmovzxbq:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
+; SKX-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
+; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmovzxbq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50]
; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmovzxbw:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
+; SKX-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [3:1.00]
+; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmovzxbw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:0.50]
; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmovzxdq:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
+; SKX-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [3:1.00]
+; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmovzxdq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50]
; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmovzxwd:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
+; SKX-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [3:1.00]
+; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmovzxwd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50]
; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmovzxwq:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
+; SKX-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [3:1.00]
+; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmovzxwq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50]
; SKYLAKE-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmuldq:
+; SKX: # BB#0:
+; SKX-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmuldq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
; SKYLAKE-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmulhrsw:
+; SKX: # BB#0:
+; SKX-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmulhrsw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
; SKYLAKE-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmulhuw:
+; SKX: # BB#0:
+; SKX-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmulhuw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
; SKYLAKE-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmulhw:
+; SKX: # BB#0:
+; SKX-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmulhw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
; SKYLAKE-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [8:0.67]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmulld:
+; SKX: # BB#0:
+; SKX-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [8:0.67]
+; SKX-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [8:0.67]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmulld:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
; SKYLAKE-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmullw:
+; SKX: # BB#0:
+; SKX-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmullw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
; SKYLAKE-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmuludq:
+; SKX: # BB#0:
+; SKX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pmuludq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_por:
+; SKX: # BB#0:
+; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_por:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psadbw:
+; SKX: # BB#0:
+; SKX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; SKX-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psadbw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
; SKYLAKE-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pshufb:
+; SKX: # BB#0:
+; SKX-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pshufb:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pshufd:
+; SKX: # BB#0:
+; SKX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
+; SKX-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [1:1.00]
+; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pshufd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:0.50]
; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pshufhw:
+; SKX: # BB#0:
+; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
+; SKX-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [1:1.00]
+; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pshufhw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:0.50]
; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pshuflw:
+; SKX: # BB#0:
+; SKX-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
+; SKX-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [1:1.00]
+; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pshuflw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:0.50]
; SKYLAKE-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psignb:
+; SKX: # BB#0:
+; SKX-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psignb:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psignd:
+; SKX: # BB#0:
+; SKX-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psignd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psignw:
+; SKX: # BB#0:
+; SKX-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psignw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pslld:
+; SKX: # BB#0:
+; SKX-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
+; SKX-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pslld:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
; SKYLAKE-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pslldq:
+; SKX: # BB#0:
+; SKX-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pslldq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [2:1.00]
; SKYLAKE-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psllq:
+; SKX: # BB#0:
+; SKX-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
+; SKX-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psllq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
; SKYLAKE-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psllvd:
+; SKX: # BB#0:
+; SKX-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psllvd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psllvd_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psllvd_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psllvq:
+; SKX: # BB#0:
+; SKX-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psllvq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psllvq_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psllvq_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psllw:
+; SKX: # BB#0:
+; SKX-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
+; SKX-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psllw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
; SKYLAKE-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psrad:
+; SKX: # BB#0:
+; SKX-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
+; SKX-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psrad:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
; SKYLAKE-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psravd:
+; SKX: # BB#0:
+; SKX-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psravd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psravd_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psravd_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psraw:
+; SKX: # BB#0:
+; SKX-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
+; SKX-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psraw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
; SKYLAKE-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psrld:
+; SKX: # BB#0:
+; SKX-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
+; SKX-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psrld:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
; SKYLAKE-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psrldq:
+; SKX: # BB#0:
+; SKX-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psrldq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [2:1.00]
; SKYLAKE-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psrlq:
+; SKX: # BB#0:
+; SKX-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
+; SKX-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psrlq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
; SKYLAKE-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psrlvd:
+; SKX: # BB#0:
+; SKX-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psrlvd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psrlvd_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psrlvd_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psrlvq:
+; SKX: # BB#0:
+; SKX-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psrlvq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psrlvq_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psrlvq_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psrlw:
+; SKX: # BB#0:
+; SKX-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
+; SKX-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psrlw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
; SKYLAKE-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psubb:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psubb:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psubd:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psubd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psubq:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psubq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psubsb:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psubsb:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psubsw:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psubsw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psubusb:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psubusb:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psubusw:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SKX-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psubusw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psubw:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_psubw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_punpckhbw:
+; SKX: # BB#0:
+; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
+; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_punpckhbw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:0.25]
; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_punpckhdq:
+; SKX: # BB#0:
+; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
+; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [1:1.00]
+; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:1.00]
+; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_punpckhdq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:0.25]
; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_punpckhqdq:
+; SKX: # BB#0:
+; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
+; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [1:1.00]
+; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_punpckhqdq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:0.25]
; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_punpckhwd:
+; SKX: # BB#0:
+; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
+; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_punpckhwd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:0.25]
; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_punpcklbw:
+; SKX: # BB#0:
+; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
+; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_punpcklbw:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:0.25]
; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_punpckldq:
+; SKX: # BB#0:
+; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
+; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00]
+; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:1.00]
+; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_punpckldq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:0.25]
; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_punpcklqdq:
+; SKX: # BB#0:
+; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
+; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [1:1.00]
+; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_punpcklqdq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:0.25]
; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_punpcklwd:
+; SKX: # BB#0:
+; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
+; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_punpcklwd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:0.25]
; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pxor:
+; SKX: # BB#0:
+; SKX-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_pxor:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
;
; KNL-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfmadd213pd:
+; SKX: # BB#0:
+; SKX-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfmadd213pd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfmadd213pd_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfmadd213pd_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0
; KNL-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfmadd213ps:
+; SKX: # BB#0:
+; SKX-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfmadd213ps:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfmadd213ps_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfmadd213ps_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0
; KNL-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfmadd213sd:
+; SKX: # BB#0:
+; SKX-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfmadd213sd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfmadd213ss:
+; SKX: # BB#0:
+; SKX-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfmadd213ss:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfmaddsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfmaddsubpd:
+; SKX: # BB#0:
+; SKX-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vfmaddsub213pd (%rdi), %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfmaddsubpd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfmaddsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfmaddsubpd_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vfmaddsub213pd (%rdi), %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfmaddsubpd_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0
; KNL-NEXT: vfmaddsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfmaddsubps:
+; SKX: # BB#0:
+; SKX-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vfmaddsub213ps (%rdi), %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfmaddsubps:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfmaddsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfmaddsubps_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vfmaddsub213ps (%rdi), %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfmaddsubps_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0
; KNL-NEXT: vfmsubadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfmsubaddpd:
+; SKX: # BB#0:
+; SKX-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vfmsubadd213pd (%rdi), %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfmsubaddpd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfmsubadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfmsubaddpd_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vfmsubadd213pd (%rdi), %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfmsubaddpd_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0
; KNL-NEXT: vfmsubadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfmsubaddps:
+; SKX: # BB#0:
+; SKX-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vfmsubadd213ps (%rdi), %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfmsubaddps:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfmsubadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfmsubaddps_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vfmsubadd213ps (%rdi), %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfmsubaddps_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0
; KNL-NEXT: vfmsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfmsub213pd:
+; SKX: # BB#0:
+; SKX-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vfmsub213pd (%rdi), %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfmsub213pd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfmsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfmsub213pd_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vfmsub213pd (%rdi), %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfmsub213pd_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0
; KNL-NEXT: vfmsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfmsub213ps:
+; SKX: # BB#0:
+; SKX-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vfmsub213ps (%rdi), %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfmsub213ps:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfmsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfmsub213ps_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vfmsub213ps (%rdi), %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfmsub213ps_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
; KNL-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfmsub213sd:
+; SKX: # BB#0:
+; SKX-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfmsub213sd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfmsub213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfmsub213ss:
+; SKX: # BB#0:
+; SKX-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vfmsub213ss (%rdi), %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfmsub213ss:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfnmadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfnmadd213pd:
+; SKX: # BB#0:
+; SKX-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vfnmadd213pd (%rdi), %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfnmadd213pd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfnmadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfnmadd213pd_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vfnmadd213pd (%rdi), %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfnmadd213pd_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0
; KNL-NEXT: vfnmadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfnmadd213ps:
+; SKX: # BB#0:
+; SKX-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vfnmadd213ps (%rdi), %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfnmadd213ps:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfnmadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfnmadd213ps_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vfnmadd213ps (%rdi), %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfnmadd213ps_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; KNL-NEXT: vfnmadd213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfnmadd213sd:
+; SKX: # BB#0:
+; SKX-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vfnmadd213sd (%rdi), %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfnmadd213sd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfnmadd213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfnmadd213ss:
+; SKX: # BB#0:
+; SKX-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vfnmadd213ss (%rdi), %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfnmadd213ss:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfnmsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfnmsub213pd:
+; SKX: # BB#0:
+; SKX-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vfnmsub213pd (%rdi), %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfnmsub213pd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfnmsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfnmsub213pd_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vfnmsub213pd (%rdi), %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfnmsub213pd_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0
; KNL-NEXT: vfnmsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfnmsub213ps:
+; SKX: # BB#0:
+; SKX-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vfnmsub213ps (%rdi), %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfnmsub213ps:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfnmsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfnmsub213ps_ymm:
+; SKX: # BB#0:
+; SKX-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: vfnmsub213ps (%rdi), %ymm1, %ymm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfnmsub213ps_ymm:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0
; KNL-NEXT: vfnmsub213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfnmsub213sd:
+; SKX: # BB#0:
+; SKX-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vfnmsub213sd (%rdi), %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfnmsub213sd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfnmsub213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_vfnmsub213ss:
+; SKX: # BB#0:
+; SKX-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vfnmsub213ss (%rdi), %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; ZNVER1-LABEL: test_vfnmsub213ss:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
; SKYLAKE-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_addps:
+; SKX: # BB#0:
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_addps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_addss:
+; SKX: # BB#0:
+; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_addss:
; BTVER2: # BB#0:
; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_andps:
+; SKX: # BB#0:
+; SKX-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_andps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_andnotps:
+; SKX: # BB#0:
+; SKX-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_andnotps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cmpps:
+; SKX: # BB#0:
+; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
+; SKX-NEXT: vcmpeqps (%rdi), %xmm0, %k1
+; SKX-NEXT: korw %k1, %k0, %k0
+; SKX-NEXT: vpmovm2d %k0, %xmm0
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cmpps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
; SKYLAKE-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cmpss:
+; SKX: # BB#0:
+; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SKX-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cmpss:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_comiss:
+; SKX: # BB#0:
+; SKX-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
+; SKX-NEXT: setnp %al # sched: [1:1.00]
+; SKX-NEXT: sete %cl # sched: [1:1.00]
+; SKX-NEXT: andb %al, %cl # sched: [1:0.25]
+; SKX-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00]
+; SKX-NEXT: setnp %al # sched: [1:1.00]
+; SKX-NEXT: sete %dl # sched: [1:1.00]
+; SKX-NEXT: andb %al, %dl # sched: [1:0.25]
+; SKX-NEXT: orb %cl, %dl # sched: [1:0.25]
+; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_comiss:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvtsi2ss:
+; SKX: # BB#0:
+; SKX-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
+; SKX-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
+; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvtsi2ss:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvtsi2ssq:
+; SKX: # BB#0:
+; SKX-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00]
+; SKX-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
+; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvtsi2ssq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvtss2si:
+; SKX: # BB#0:
+; SKX-NEXT: vcvtss2si %xmm0, %ecx # sched: [6:1.00]
+; SKX-NEXT: vcvtss2si (%rdi), %eax # sched: [6:1.00]
+; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvtss2si:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvtss2si (%rdi), %eax # sched: [8:1.00]
; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvtss2siq:
+; SKX: # BB#0:
+; SKX-NEXT: vcvtss2si %xmm0, %rcx # sched: [6:1.00]
+; SKX-NEXT: vcvtss2si (%rdi), %rax # sched: [6:1.00]
+; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvtss2siq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvtss2si (%rdi), %rax # sched: [8:1.00]
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvttss2si:
+; SKX: # BB#0:
+; SKX-NEXT: vcvttss2si %xmm0, %ecx # sched: [7:1.00]
+; SKX-NEXT: vcvttss2si (%rdi), %eax # sched: [6:1.00]
+; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvttss2si:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvttss2si (%rdi), %eax # sched: [8:1.00]
; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvttss2siq:
+; SKX: # BB#0:
+; SKX-NEXT: vcvttss2si %xmm0, %rcx # sched: [7:1.00]
+; SKX-NEXT: vcvttss2si (%rdi), %rax # sched: [6:1.00]
+; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvttss2siq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvttss2si (%rdi), %rax # sched: [8:1.00]
; SKYLAKE-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_divps:
+; SKX: # BB#0:
+; SKX-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
+; SKX-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_divps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
; SKYLAKE-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_divss:
+; SKX: # BB#0:
+; SKX-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
+; SKX-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_divss:
; BTVER2: # BB#0:
; BTVER2-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
; SKYLAKE-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_ldmxcsr:
+; SKX: # BB#0:
+; SKX-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; SKX-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_ldmxcsr:
; BTVER2: # BB#0:
; BTVER2-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
; SKYLAKE-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_maxps:
+; SKX: # BB#0:
+; SKX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SKX-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_maxps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_maxss:
+; SKX: # BB#0:
+; SKX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_maxss:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_minps:
+; SKX: # BB#0:
+; SKX-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SKX-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_minps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_minss:
+; SKX: # BB#0:
+; SKX-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_minss:
; BTVER2: # BB#0:
; BTVER2-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movaps:
+; SKX: # BB#0:
+; SKX-NEXT: vmovaps (%rdi), %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movaps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovaps (%rdi), %xmm0 # sched: [5:1.00]
; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movhlps:
+; SKX: # BB#0:
+; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movhlps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movhps:
+; SKX: # BB#0:
+; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movhps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movlhps:
+; SKX: # BB#0:
+; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movlhps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
; SKYLAKE-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movlps:
+; SKX: # BB#0:
+; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movlps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; SKYLAKE-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movmskps:
+; SKX: # BB#0:
+; SKX-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movmskps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovmskps %xmm0, %eax # sched: [1:0.50]
; SKYLAKE-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movntps:
+; SKX: # BB#0:
+; SKX-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movntps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movss_mem:
+; SKX: # BB#0:
+; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50]
+; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movss_mem:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movss_reg:
+; SKX: # BB#0:
+; SKX-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movss_reg:
; BTVER2: # BB#0:
; BTVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
; SKYLAKE-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movups:
+; SKX: # BB#0:
+; SKX-NEXT: vmovups (%rdi), %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movups:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovups (%rdi), %xmm0 # sched: [5:1.00]
; SKYLAKE-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_mulps:
+; SKX: # BB#0:
+; SKX-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_mulps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SKYLAKE-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_mulss:
+; SKX: # BB#0:
+; SKX-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_mulss:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SKYLAKE-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_orps:
+; SKX: # BB#0:
+; SKX-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_orps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: prefetchnta (%rdi) # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_prefetchnta:
+; SKX: # BB#0:
+; SKX-NEXT: prefetchnta (%rdi) # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_prefetchnta:
; BTVER2: # BB#0:
; BTVER2-NEXT: prefetchnta (%rdi) # sched: [5:1.00]
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_rcpps:
+; SKX: # BB#0:
+; SKX-NEXT: vrcp14ps %xmm0, %xmm0
+; SKX-NEXT: vrcp14ps (%rdi), %xmm1
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_rcpps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vrcpps (%rdi), %xmm1 # sched: [7:1.00]
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_rcpss:
+; SKX: # BB#0:
+; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
+; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
+; SKX-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_rcpss:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_rsqrtps:
+; SKX: # BB#0:
+; SKX-NEXT: vrsqrt14ps %xmm0, %xmm0
+; SKX-NEXT: vrsqrt14ps (%rdi), %xmm1
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_rsqrtps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [7:1.00]
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_rsqrtss:
+; SKX: # BB#0:
+; SKX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
+; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
+; SKX-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_rsqrtss:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
; SKYLAKE-NEXT: sfence # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_sfence:
+; SKX: # BB#0:
+; SKX-NEXT: sfence # sched: [1:0.33]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_sfence:
; BTVER2: # BB#0:
; BTVER2-NEXT: sfence # sched: [1:1.00]
; SKYLAKE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_shufps:
+; SKX: # BB#0:
+; SKX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
+; SKX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_shufps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_sqrtps:
+; SKX: # BB#0:
+; SKX-NEXT: vsqrtps %xmm0, %xmm0 # sched: [12:1.00]
+; SKX-NEXT: vsqrtps (%rdi), %xmm1 # sched: [12:1.00]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_sqrtps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vsqrtps (%rdi), %xmm1 # sched: [26:21.00]
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_sqrtss:
+; SKX: # BB#0:
+; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:1.00]
+; SKX-NEXT: vmovaps (%rdi), %xmm1 # sched: [1:0.50]
+; SKX-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [12:1.00]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_sqrtss:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovaps (%rdi), %xmm1 # sched: [5:1.00]
; SKYLAKE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_stmxcsr:
+; SKX: # BB#0:
+; SKX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; SKX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_stmxcsr:
; BTVER2: # BB#0:
; BTVER2-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
; SKYLAKE-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_subps:
+; SKX: # BB#0:
+; SKX-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_subps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_subss:
+; SKX: # BB#0:
+; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_subss:
; BTVER2: # BB#0:
; BTVER2-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_ucomiss:
+; SKX: # BB#0:
+; SKX-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
+; SKX-NEXT: setnp %al # sched: [1:1.00]
+; SKX-NEXT: sete %cl # sched: [1:1.00]
+; SKX-NEXT: andb %al, %cl # sched: [1:0.25]
+; SKX-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00]
+; SKX-NEXT: setnp %al # sched: [1:1.00]
+; SKX-NEXT: sete %dl # sched: [1:1.00]
+; SKX-NEXT: andb %al, %dl # sched: [1:0.25]
+; SKX-NEXT: orb %cl, %dl # sched: [1:0.25]
+; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_ucomiss:
; BTVER2: # BB#0:
; BTVER2-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_unpckhps:
+; SKX: # BB#0:
+; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_unpckhps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
; SKYLAKE-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_unpcklps:
+; SKX: # BB#0:
+; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_unpcklps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
; SKYLAKE-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_xorps:
+; SKX: # BB#0:
+; SKX-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_xorps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
; SKYLAKE-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_addpd:
+; SKX: # BB#0:
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_addpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_addsd:
+; SKX: # BB#0:
+; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_addsd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_andpd:
+; SKX: # BB#0:
+; SKX-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_andpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_andnotpd:
+; SKX: # BB#0:
+; SKX-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_andnotpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cmppd:
+; SKX: # BB#0:
+; SKX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
+; SKX-NEXT: vcmpeqpd (%rdi), %xmm0, %k1
+; SKX-NEXT: korw %k1, %k0, %k0
+; SKX-NEXT: vpmovm2q %k0, %xmm0
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cmppd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
; SKYLAKE-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cmpsd:
+; SKX: # BB#0:
+; SKX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SKX-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cmpsd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_comisd:
+; SKX: # BB#0:
+; SKX-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00]
+; SKX-NEXT: setnp %al # sched: [1:1.00]
+; SKX-NEXT: sete %cl # sched: [1:1.00]
+; SKX-NEXT: andb %al, %cl # sched: [1:0.25]
+; SKX-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00]
+; SKX-NEXT: setnp %al # sched: [1:1.00]
+; SKX-NEXT: sete %dl # sched: [1:1.00]
+; SKX-NEXT: andb %al, %dl # sched: [1:0.25]
+; SKX-NEXT: orb %cl, %dl # sched: [1:0.25]
+; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_comisd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvtdq2pd:
+; SKX: # BB#0:
+; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00]
+; SKX-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [5:1.00]
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvtdq2pd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [8:1.00]
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvtdq2ps:
+; SKX: # BB#0:
+; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [4:0.50]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvtdq2ps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00]
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvtpd2dq:
+; SKX: # BB#0:
+; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00]
+; SKX-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
+; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvtpd2dq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvtpd2ps:
+; SKX: # BB#0:
+; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00]
+; SKX-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvtpd2ps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvtps2dq:
+; SKX: # BB#0:
+; SKX-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [4:0.50]
+; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvtps2dq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [8:1.00]
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvtps2pd:
+; SKX: # BB#0:
+; SKX-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00]
+; SKX-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [4:0.50]
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvtps2pd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [8:1.00]
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvtsd2si:
+; SKX: # BB#0:
+; SKX-NEXT: vcvtsd2si %xmm0, %ecx # sched: [6:1.00]
+; SKX-NEXT: vcvtsd2si (%rdi), %eax # sched: [6:1.00]
+; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvtsd2si:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvtsd2si (%rdi), %eax # sched: [8:1.00]
; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvtsd2siq:
+; SKX: # BB#0:
+; SKX-NEXT: vcvtsd2si %xmm0, %rcx # sched: [6:1.00]
+; SKX-NEXT: vcvtsd2si (%rdi), %rax # sched: [6:1.00]
+; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvtsd2siq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvtsd2si (%rdi), %rax # sched: [8:1.00]
; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvtsd2ss:
+; SKX: # BB#0:
+; SKX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
+; SKX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [1:0.50]
+; SKX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
+; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvtsd2ss:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvtsi2sd:
+; SKX: # BB#0:
+; SKX-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
+; SKX-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
+; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvtsi2sd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvtsi2sdq:
+; SKX: # BB#0:
+; SKX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
+; SKX-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
+; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvtsi2sdq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvtss2sd:
+; SKX: # BB#0:
+; SKX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
+; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
+; SKX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
+; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvtss2sd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvttpd2dq:
+; SKX: # BB#0:
+; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00]
+; SKX-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
+; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvttpd2dq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvttps2dq:
+; SKX: # BB#0:
+; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [4:0.50]
+; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvttps2dq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [8:1.00]
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvttsd2si:
+; SKX: # BB#0:
+; SKX-NEXT: vcvttsd2si %xmm0, %ecx # sched: [6:1.00]
+; SKX-NEXT: vcvttsd2si (%rdi), %eax # sched: [6:1.00]
+; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvttsd2si:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvttsd2si (%rdi), %eax # sched: [8:1.00]
; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_cvttsd2siq:
+; SKX: # BB#0:
+; SKX-NEXT: vcvttsd2si %xmm0, %rcx # sched: [6:1.00]
+; SKX-NEXT: vcvttsd2si (%rdi), %rax # sched: [6:1.00]
+; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_cvttsd2siq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvttsd2si (%rdi), %rax # sched: [8:1.00]
; SKYLAKE-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_divpd:
+; SKX: # BB#0:
+; SKX-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
+; SKX-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_divpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
; SKYLAKE-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_divsd:
+; SKX: # BB#0:
+; SKX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
+; SKX-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_divsd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
; SKYLAKE-NEXT: lfence # sched: [2:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_lfence:
+; SKX: # BB#0:
+; SKX-NEXT: lfence # sched: [2:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_lfence:
; BTVER2: # BB#0:
; BTVER2-NEXT: lfence # sched: [1:1.00]
; SKYLAKE-NEXT: mfence # sched: [2:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_mfence:
+; SKX: # BB#0:
+; SKX-NEXT: mfence # sched: [2:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_mfence:
; BTVER2: # BB#0:
; BTVER2-NEXT: mfence # sched: [1:1.00]
; SKYLAKE-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_maskmovdqu:
+; SKX: # BB#0:
+; SKX-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_maskmovdqu:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_maxpd:
+; SKX: # BB#0:
+; SKX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SKX-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_maxpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_maxsd:
+; SKX: # BB#0:
+; SKX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_maxsd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_minpd:
+; SKX: # BB#0:
+; SKX-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SKX-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_minpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_minsd:
+; SKX: # BB#0:
+; SKX-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_minsd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movapd:
+; SKX: # BB#0:
+; SKX-NEXT: vmovapd (%rdi), %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movapd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovapd (%rdi), %xmm0 # sched: [5:1.00]
; SKYLAKE-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movdqa:
+; SKX: # BB#0:
+; SKX-NEXT: vmovdqa (%rdi), %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movdqa:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovdqa (%rdi), %xmm0 # sched: [5:1.00]
; SKYLAKE-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movdqu:
+; SKX: # BB#0:
+; SKX-NEXT: vmovdqu (%rdi), %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movdqu:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovdqu (%rdi), %xmm0 # sched: [5:1.00]
; SKYLAKE-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movd:
+; SKX: # BB#0:
+; SKX-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00]
+; SKX-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [1:0.50]
+; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00]
+; SKX-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
; SKYLAKE-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movd_64:
+; SKX: # BB#0:
+; SKX-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00]
+; SKX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [1:0.50]
+; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vmovq %xmm0, %rax # sched: [2:1.00]
+; SKX-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movd_64:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00]
; SKYLAKE-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movhpd:
+; SKX: # BB#0:
+; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movhpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; SKYLAKE-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movlpd:
+; SKX: # BB#0:
+; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movlpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; SKYLAKE-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movmskpd:
+; SKX: # BB#0:
+; SKX-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movmskpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovmskpd %xmm0, %eax # sched: [1:0.50]
; SKYLAKE-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movntdqa:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movntdqa:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movntpd:
+; SKX: # BB#0:
+; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movntpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movq_mem:
+; SKX: # BB#0:
+; SKX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [1:0.50]
+; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movq_mem:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
; SKYLAKE-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movq_reg:
+; SKX: # BB#0:
+; SKX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50]
+; SKX-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movq_reg:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50]
; SKYLAKE-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movsd_mem:
+; SKX: # BB#0:
+; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [1:0.50]
+; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movsd_mem:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00]
; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movsd_reg:
+; SKX: # BB#0:
+; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movsd_reg:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50]
; SKYLAKE-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movupd:
+; SKX: # BB#0:
+; SKX-NEXT: vmovupd (%rdi), %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movupd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovupd (%rdi), %xmm0 # sched: [5:1.00]
; SKYLAKE-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_mulpd:
+; SKX: # BB#0:
+; SKX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_mulpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SKYLAKE-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_mulsd:
+; SKX: # BB#0:
+; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_mulsd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_orpd:
+; SKX: # BB#0:
+; SKX-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_orpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_packssdw:
+; SKX: # BB#0:
+; SKX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_packssdw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_packsswb:
+; SKX: # BB#0:
+; SKX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_packsswb:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_packuswb:
+; SKX: # BB#0:
+; SKX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_packuswb:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_paddb:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_paddb:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_paddd:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_paddd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_paddq:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_paddq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_paddsb:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_paddsb:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_paddsw:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_paddsw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_paddusb:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_paddusb:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_paddusw:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_paddusw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_paddw:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_paddw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pand:
+; SKX: # BB#0:
+; SKX-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pand:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pandn:
+; SKX: # BB#0:
+; SKX-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pandn:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pavgb:
+; SKX: # BB#0:
+; SKX-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pavgb:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pavgw:
+; SKX: # BB#0:
+; SKX-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pavgw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pcmpeqb:
+; SKX: # BB#0:
+; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
+; SKX-NEXT: vpcmpeqb (%rdi), %xmm0, %k1
+; SKX-NEXT: korw %k1, %k0, %k0
+; SKX-NEXT: vpmovm2b %k0, %xmm0
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pcmpeqb:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pcmpeqd:
+; SKX: # BB#0:
+; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
+; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %k1
+; SKX-NEXT: korw %k1, %k0, %k0
+; SKX-NEXT: vpmovm2d %k0, %xmm0
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pcmpeqd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pcmpeqw:
+; SKX: # BB#0:
+; SKX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
+; SKX-NEXT: vpcmpeqw (%rdi), %xmm0, %k1
+; SKX-NEXT: korb %k1, %k0, %k0
+; SKX-NEXT: vpmovm2w %k0, %xmm0
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pcmpeqw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pcmpgtb:
+; SKX: # BB#0:
+; SKX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
+; SKX-NEXT: vpcmpgtb (%rdi), %xmm0, %k1
+; SKX-NEXT: korw %k1, %k0, %k0
+; SKX-NEXT: vpmovm2b %k0, %xmm0
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pcmpgtb:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pcmpgtd:
+; SKX: # BB#0:
+; SKX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
+; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %k1
+; SKX-NEXT: korw %k1, %k0, %k0
+; SKX-NEXT: vpmovm2d %k0, %xmm0
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pcmpgtd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pcmpgtw:
+; SKX: # BB#0:
+; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
+; SKX-NEXT: vpcmpgtw (%rdi), %xmm0, %k1
+; SKX-NEXT: korb %k1, %k0, %k0
+; SKX-NEXT: vpmovm2w %k0, %xmm0
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pcmpgtw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
; SKYLAKE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pextrw:
+; SKX: # BB#0:
+; SKX-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00]
+; SKX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pextrw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpextrw $6, %xmm0, %eax # sched: [1:0.50]
; SKYLAKE-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pinsrw:
+; SKX: # BB#0:
+; SKX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
+; SKX-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pinsrw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmaddwd:
+; SKX: # BB#0:
+; SKX-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmaddwd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SKYLAKE-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmaxsw:
+; SKX: # BB#0:
+; SKX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmaxsw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmaxub:
+; SKX: # BB#0:
+; SKX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmaxub:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pminsw:
+; SKX: # BB#0:
+; SKX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pminsw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pminub:
+; SKX: # BB#0:
+; SKX-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pminub:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmovmskb:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmovmskb:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmovmskb %xmm0, %eax # sched: [1:0.50]
; SKYLAKE-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmulhuw:
+; SKX: # BB#0:
+; SKX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmulhuw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SKYLAKE-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmulhw:
+; SKX: # BB#0:
+; SKX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmulhw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SKYLAKE-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmullw:
+; SKX: # BB#0:
+; SKX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmullw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SKYLAKE-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmuludq:
+; SKX: # BB#0:
+; SKX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmuludq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_por:
+; SKX: # BB#0:
+; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_por:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psadbw:
+; SKX: # BB#0:
+; SKX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SKX-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_psadbw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pshufd:
+; SKX: # BB#0:
+; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
+; SKX-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [1:1.00]
+; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pshufd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00]
; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pshufhw:
+; SKX: # BB#0:
+; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
+; SKX-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [1:1.00]
+; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pshufhw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00]
; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pshuflw:
+; SKX: # BB#0:
+; SKX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
+; SKX-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [1:1.00]
+; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pshuflw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00]
; SKYLAKE-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pslld:
+; SKX: # BB#0:
+; SKX-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; SKX-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pslld:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pslldq:
+; SKX: # BB#0:
+; SKX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pslldq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
; SKYLAKE-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psllq:
+; SKX: # BB#0:
+; SKX-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; SKX-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_psllq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psllw:
+; SKX: # BB#0:
+; SKX-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; SKX-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_psllw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psrad:
+; SKX: # BB#0:
+; SKX-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; SKX-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_psrad:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psraw:
+; SKX: # BB#0:
+; SKX-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; SKX-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_psraw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psrld:
+; SKX: # BB#0:
+; SKX-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; SKX-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_psrld:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psrldq:
+; SKX: # BB#0:
+; SKX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_psrldq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
; SKYLAKE-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psrlq:
+; SKX: # BB#0:
+; SKX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; SKX-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_psrlq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psrlw:
+; SKX: # BB#0:
+; SKX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; SKX-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_psrlw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psubb:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_psubb:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psubd:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_psubd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psubq:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_psubq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psubsb:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_psubsb:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psubsw:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_psubsw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psubusb:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_psubusb:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psubusw:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_psubusw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psubw:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_psubw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_punpckhbw:
+; SKX: # BB#0:
+; SKX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
+; SKX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_punpckhbw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_punpckhdq:
+; SKX: # BB#0:
+; SKX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; SKX-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [1:1.00]
+; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_punpckhdq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_punpckhqdq:
+; SKX: # BB#0:
+; SKX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; SKX-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00]
+; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_punpckhqdq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_punpckhwd:
+; SKX: # BB#0:
+; SKX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; SKX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_punpckhwd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_punpcklbw:
+; SKX: # BB#0:
+; SKX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; SKX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_punpcklbw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_punpckldq:
+; SKX: # BB#0:
+; SKX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; SKX-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [1:1.00]
+; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_punpckldq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_punpcklqdq:
+; SKX: # BB#0:
+; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
+; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_punpcklqdq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_punpcklwd:
+; SKX: # BB#0:
+; SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_punpcklwd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pxor:
+; SKX: # BB#0:
+; SKX-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pxor:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_shufpd:
+; SKX: # BB#0:
+; SKX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
+; SKX-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [1:1.00]
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_shufpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_sqrtpd:
+; SKX: # BB#0:
+; SKX-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [18:1.00]
+; SKX-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [18:1.00]
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_sqrtpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [26:21.00]
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_sqrtsd:
+; SKX: # BB#0:
+; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:1.00]
+; SKX-NEXT: vmovapd (%rdi), %xmm1 # sched: [1:0.50]
+; SKX-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [18:1.00]
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_sqrtsd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovapd (%rdi), %xmm1 # sched: [5:1.00]
; SKYLAKE-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_subpd:
+; SKX: # BB#0:
+; SKX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_subpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_subsd:
+; SKX: # BB#0:
+; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_subsd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_ucomisd:
+; SKX: # BB#0:
+; SKX-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00]
+; SKX-NEXT: setnp %al # sched: [1:1.00]
+; SKX-NEXT: sete %cl # sched: [1:1.00]
+; SKX-NEXT: andb %al, %cl # sched: [1:0.25]
+; SKX-NEXT: vucomisd (%rdi), %xmm0 # sched: [8:1.00]
+; SKX-NEXT: setnp %al # sched: [1:1.00]
+; SKX-NEXT: sete %dl # sched: [1:1.00]
+; SKX-NEXT: andb %al, %dl # sched: [1:0.25]
+; SKX-NEXT: orb %cl, %dl # sched: [1:0.25]
+; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_ucomisd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_unpckhpd:
+; SKX: # BB#0:
+; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; SKX-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00]
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_unpckhpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_unpcklpd:
+; SKX: # BB#0:
+; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [1:1.00]
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_unpcklpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_xorpd:
+; SKX: # BB#0:
+; SKX-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_xorpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
; SKYLAKE-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_addsubpd:
+; SKX: # BB#0:
+; SKX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_addsubpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_addsubps:
+; SKX: # BB#0:
+; SKX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_addsubps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_haddpd:
+; SKX: # BB#0:
+; SKX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
+; SKX-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_haddpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_haddps:
+; SKX: # BB#0:
+; SKX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
+; SKX-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_haddps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_hsubpd:
+; SKX: # BB#0:
+; SKX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
+; SKX-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_hsubpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_hsubps:
+; SKX: # BB#0:
+; SKX-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
+; SKX-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_hsubps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vlddqu (%rdi), %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_lddqu:
+; SKX: # BB#0:
+; SKX-NEXT: vlddqu (%rdi), %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_lddqu:
; BTVER2: # BB#0:
; BTVER2-NEXT: vlddqu (%rdi), %xmm0 # sched: [5:1.00]
; SKYLAKE-NEXT: monitor # sched: [100:0.25]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_monitor:
+; SKX: # BB#0:
+; SKX-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
+; SKX-NEXT: movl %esi, %ecx # sched: [1:0.25]
+; SKX-NEXT: monitor # sched: [100:0.25]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_monitor:
; BTVER2: # BB#0:
; BTVER2-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movddup:
+; SKX: # BB#0:
+; SKX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
+; SKX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [1:0.50]
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movddup:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:1.00]
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movshdup:
+; SKX: # BB#0:
+; SKX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
+; SKX-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [1:0.50]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movshdup:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [5:1.00]
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movsldup:
+; SKX: # BB#0:
+; SKX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
+; SKX-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [1:0.50]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movsldup:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [5:1.00]
; SKYLAKE-NEXT: mwait # sched: [20:2.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_mwait:
+; SKX: # BB#0:
+; SKX-NEXT: movl %edi, %ecx # sched: [1:0.25]
+; SKX-NEXT: movl %esi, %eax # sched: [1:0.25]
+; SKX-NEXT: mwait # sched: [20:2.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_mwait:
; BTVER2: # BB#0:
; BTVER2-NEXT: movl %edi, %ecx # sched: [1:0.17]
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_blendpd:
+; SKX: # BB#0:
+; SKX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:1.00]
+; SKX-NEXT: vmovapd (%rdi), %xmm2 # sched: [1:0.50]
+; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm2[1] sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_blendpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_blendps:
+; SKX: # BB#0:
+; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
+; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_blendps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
; SKYLAKE-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:0.67]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_blendvpd:
+; SKX: # BB#0:
+; SKX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
+; SKX-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:0.67]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_blendvpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SKYLAKE-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:0.67]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_blendvps:
+; SKX: # BB#0:
+; SKX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
+; SKX-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:0.67]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_blendvps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SKYLAKE-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_dppd:
+; SKX: # BB#0:
+; SKX-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
+; SKX-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_dppd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [13:1.33]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_dpps:
+; SKX: # BB#0:
+; SKX-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [13:1.33]
+; SKX-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [13:1.33]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_dpps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_insertps:
+; SKX: # BB#0:
+; SKX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
+; SKX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_insertps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50]
; SKYLAKE-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_movntdqa:
+; SKX: # BB#0:
+; SKX-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_movntdqa:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [5:1.00]
; SKYLAKE-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [4:2.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_mpsadbw:
+; SKX: # BB#0:
+; SKX-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
+; SKX-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [4:2.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_mpsadbw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
; SKYLAKE-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_packusdw:
+; SKX: # BB#0:
+; SKX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_packusdw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:0.67]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pblendvb:
+; SKX: # BB#0:
+; SKX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
+; SKX-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:0.67]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pblendvb:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SKYLAKE-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pblendw:
+; SKX: # BB#0:
+; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
+; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pblendw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
; SKYLAKE-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pcmpeqq:
+; SKX: # BB#0:
+; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
+; SKX-NEXT: vpmovm2q %k0, %xmm0
+; SKX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
+; SKX-NEXT: vpmovm2q %k0, %xmm0
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pcmpeqq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pextrb:
+; SKX: # BB#0:
+; SKX-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00]
+; SKX-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pextrb:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpextrb $3, %xmm0, %eax # sched: [1:0.50]
; SKYLAKE-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pextrd:
+; SKX: # BB#0:
+; SKX-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00]
+; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pextrd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpextrd $3, %xmm0, %eax # sched: [1:0.50]
; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pextrq:
+; SKX: # BB#0:
+; SKX-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00]
+; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pextrq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpextrq $1, %xmm0, %rax # sched: [1:0.50]
; SKYLAKE-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pextrw:
+; SKX: # BB#0:
+; SKX-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00]
+; SKX-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pextrw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpextrw $3, %xmm0, %eax # sched: [1:0.50]
; SKYLAKE-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:0.33]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_phminposuw:
+; SKX: # BB#0:
+; SKX-NEXT: vphminposuw (%rdi), %xmm0 # sched: [4:0.50]
+; SKX-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_phminposuw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vphminposuw (%rdi), %xmm0 # sched: [7:1.00]
; SKYLAKE-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pinsrb:
+; SKX: # BB#0:
+; SKX-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
+; SKX-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pinsrb:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pinsrd:
+; SKX: # BB#0:
+; SKX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
+; SKX-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pinsrd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pinsrq:
+; SKX: # BB#0:
+; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
+; SKX-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [1:1.00]
+; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pinsrq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00]
; SKYLAKE-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmaxsb:
+; SKX: # BB#0:
+; SKX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmaxsb:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmaxsd:
+; SKX: # BB#0:
+; SKX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmaxsd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmaxud:
+; SKX: # BB#0:
+; SKX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmaxud:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmaxuw:
+; SKX: # BB#0:
+; SKX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmaxuw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pminsb:
+; SKX: # BB#0:
+; SKX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pminsb:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pminsd:
+; SKX: # BB#0:
+; SKX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pminsd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pminud:
+; SKX: # BB#0:
+; SKX-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pminud:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pminuw:
+; SKX: # BB#0:
+; SKX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pminuw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmovsxbw:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [1:1.00]
+; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmovsxbw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00]
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmovsxbd:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [1:1.00]
+; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmovsxbd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00]
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmovsxbq:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [1:1.00]
+; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmovsxbq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00]
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmovsxdq:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [1:1.00]
+; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmovsxdq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00]
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmovsxwd:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [1:1.00]
+; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmovsxwd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00]
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmovsxwq:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [1:1.00]
+; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmovsxwq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00]
; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmovzxbw:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
+; SKX-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [1:1.00]
+; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmovzxbw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmovzxbd:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
+; SKX-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [1:1.00]
+; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmovzxbd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmovzxbq:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
+; SKX-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
+; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmovzxbq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmovzxdq:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
+; SKX-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [1:1.00]
+; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmovzxdq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00]
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmovzxwd:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
+; SKX-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [1:1.00]
+; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmovzxwd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmovzxwq:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
+; SKX-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [1:1.00]
+; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmovzxwq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
; SKYLAKE-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmuldq:
+; SKX: # BB#0:
+; SKX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmuldq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SKYLAKE-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmulld:
+; SKX: # BB#0:
+; SKX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [8:0.67]
+; SKX-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmulld:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SKYLAKE-NEXT: movzbl %cl, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_ptest:
+; SKX: # BB#0:
+; SKX-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00]
+; SKX-NEXT: setb %al # sched: [1:1.00]
+; SKX-NEXT: vptest (%rdi), %xmm0 # sched: [3:1.00]
+; SKX-NEXT: setb %cl # sched: [1:1.00]
+; SKX-NEXT: andb %al, %cl # sched: [1:0.25]
+; SKX-NEXT: movzbl %cl, %eax # sched: [1:0.25]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_ptest:
; BTVER2: # BB#0:
; BTVER2-NEXT: vptest %xmm1, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_roundpd:
+; SKX: # BB#0:
+; SKX-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:0.67]
+; SKX-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [8:0.67]
+; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_roundpd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [8:1.00]
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_roundps:
+; SKX: # BB#0:
+; SKX-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:0.67]
+; SKX-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [8:0.67]
+; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_roundps:
; BTVER2: # BB#0:
; BTVER2-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [8:1.00]
; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_roundsd:
+; SKX: # BB#0:
+; SKX-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67]
+; SKX-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
+; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_roundsd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_roundss:
+; SKX: # BB#0:
+; SKX-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67]
+; SKX-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
+; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_roundss:
; BTVER2: # BB#0:
; BTVER2-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: crc32_32_8:
+; SKX: # BB#0:
+; SKX-NEXT: crc32b %sil, %edi # sched: [3:1.00]
+; SKX-NEXT: crc32b (%rdx), %edi # sched: [8:1.00]
+; SKX-NEXT: movl %edi, %eax # sched: [1:0.25]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: crc32_32_8:
; BTVER2: # BB#0:
; BTVER2-NEXT: crc32b %sil, %edi # sched: [3:1.00]
; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: crc32_32_16:
+; SKX: # BB#0:
+; SKX-NEXT: crc32w %si, %edi # sched: [3:1.00]
+; SKX-NEXT: crc32w (%rdx), %edi # sched: [8:1.00]
+; SKX-NEXT: movl %edi, %eax # sched: [1:0.25]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: crc32_32_16:
; BTVER2: # BB#0:
; BTVER2-NEXT: crc32w %si, %edi # sched: [3:1.00]
; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: crc32_32_32:
+; SKX: # BB#0:
+; SKX-NEXT: crc32l %esi, %edi # sched: [3:1.00]
+; SKX-NEXT: crc32l (%rdx), %edi # sched: [8:1.00]
+; SKX-NEXT: movl %edi, %eax # sched: [1:0.25]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: crc32_32_32:
; BTVER2: # BB#0:
; BTVER2-NEXT: crc32l %esi, %edi # sched: [3:1.00]
; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: crc32_64_8:
+; SKX: # BB#0:
+; SKX-NEXT: crc32b %sil, %edi # sched: [3:1.00]
+; SKX-NEXT: crc32b (%rdx), %edi # sched: [8:1.00]
+; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: crc32_64_8:
; BTVER2: # BB#0:
; BTVER2-NEXT: crc32b %sil, %edi # sched: [3:1.00]
; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: crc32_64_64:
+; SKX: # BB#0:
+; SKX-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
+; SKX-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00]
+; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: crc32_64_64:
; BTVER2: # BB#0:
; BTVER2-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
; SKYLAKE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pcmpestri:
+; SKX: # BB#0:
+; SKX-NEXT: movl $7, %eax # sched: [1:0.25]
+; SKX-NEXT: movl $7, %edx # sched: [1:0.25]
+; SKX-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00]
+; SKX-NEXT: movl %ecx, %esi # sched: [1:0.25]
+; SKX-NEXT: movl $7, %eax # sched: [1:0.25]
+; SKX-NEXT: movl $7, %edx # sched: [1:0.25]
+; SKX-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [18:4.00]
+; SKX-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
+; SKX-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pcmpestri:
; BTVER2: # BB#0:
; BTVER2-NEXT: movl $7, %eax # sched: [1:0.17]
; SKYLAKE-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [19:4.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pcmpestrm:
+; SKX: # BB#0:
+; SKX-NEXT: movl $7, %eax # sched: [1:0.25]
+; SKX-NEXT: movl $7, %edx # sched: [1:0.25]
+; SKX-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00]
+; SKX-NEXT: movl $7, %eax # sched: [1:0.25]
+; SKX-NEXT: movl $7, %edx # sched: [1:0.25]
+; SKX-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [19:4.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pcmpestrm:
; BTVER2: # BB#0:
; BTVER2-NEXT: movl $7, %eax # sched: [1:0.17]
; SKYLAKE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pcmpistri:
+; SKX: # BB#0:
+; SKX-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [10:3.00]
+; SKX-NEXT: movl %ecx, %eax # sched: [1:0.25]
+; SKX-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [10:3.00]
+; SKX-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
+; SKX-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pcmpistri:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [6:1.00]
; SKYLAKE-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [10:3.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pcmpistrm:
+; SKX: # BB#0:
+; SKX-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00]
+; SKX-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [10:3.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pcmpistrm:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [7:1.00]
; SKYLAKE-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pcmpgtq:
+; SKX: # BB#0:
+; SKX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
+; SKX-NEXT: vpmovm2q %k0, %xmm0
+; SKX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
+; SKX-NEXT: vpmovm2q %k0, %xmm0
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pcmpgtq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pclmulqdq:
+; SKX: # BB#0:
+; SKX-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [6:1.00]
+; SKX-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pclmulqdq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pabsb:
+; SKX: # BB#0:
+; SKX-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpabsb (%rdi), %xmm1 # sched: [1:0.50]
+; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pabsb:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpabsb (%rdi), %xmm1 # sched: [6:1.00]
; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pabsd:
+; SKX: # BB#0:
+; SKX-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpabsd (%rdi), %xmm1 # sched: [1:0.50]
+; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pabsd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpabsd (%rdi), %xmm1 # sched: [6:1.00]
; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pabsw:
+; SKX: # BB#0:
+; SKX-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpabsw (%rdi), %xmm1 # sched: [1:0.50]
+; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pabsw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpabsw (%rdi), %xmm1 # sched: [6:1.00]
; SKYLAKE-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_palignr:
+; SKX: # BB#0:
+; SKX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
+; SKX-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_palignr:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
; SKYLAKE-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_phaddd:
+; SKX: # BB#0:
+; SKX-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
+; SKX-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_phaddd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_phaddsw:
+; SKX: # BB#0:
+; SKX-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
+; SKX-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_phaddsw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_phaddw:
+; SKX: # BB#0:
+; SKX-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
+; SKX-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_phaddw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_phsubd:
+; SKX: # BB#0:
+; SKX-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
+; SKX-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_phsubd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_phsubsw:
+; SKX: # BB#0:
+; SKX-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
+; SKX-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_phsubsw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_phsubw:
+; SKX: # BB#0:
+; SKX-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
+; SKX-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_phsubw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmaddubsw:
+; SKX: # BB#0:
+; SKX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmaddubsw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SKYLAKE-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pmulhrsw:
+; SKX: # BB#0:
+; SKX-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [4:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pmulhrsw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SKYLAKE-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_pshufb:
+; SKX: # BB#0:
+; SKX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_pshufb:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psignb:
+; SKX: # BB#0:
+; SKX-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_psignb:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psignd:
+; SKX: # BB#0:
+; SKX-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_psignd:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
+; SKX-LABEL: test_psignw:
+; SKX: # BB#0:
+; SKX-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SKX-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: retq # sched: [2:1.00]
+;
; BTVER2-LABEL: test_psignw:
; BTVER2: # BB#0:
; BTVER2-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]