define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
; GENERIC-LABEL: addpd512:
; GENERIC: # BB#0: # %entry
-; GENERIC-NEXT: vaddpd %zmm0, %zmm1, %zmm0
+; GENERIC-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: addpd512:
define <8 x double> @addpd512fold(<8 x double> %y) {
; GENERIC-LABEL: addpd512fold:
; GENERIC: # BB#0: # %entry
-; GENERIC-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0
+; GENERIC-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: addpd512fold:
define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
; GENERIC-LABEL: addps512:
; GENERIC: # BB#0: # %entry
-; GENERIC-NEXT: vaddps %zmm0, %zmm1, %zmm0
+; GENERIC-NEXT: vaddps %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: addps512:
define <16 x float> @addps512fold(<16 x float> %y) {
; GENERIC-LABEL: addps512fold:
; GENERIC: # BB#0: # %entry
-; GENERIC-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0
+; GENERIC-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: addps512fold:
define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
; GENERIC-LABEL: subpd512:
; GENERIC: # BB#0: # %entry
-; GENERIC-NEXT: vsubpd %zmm0, %zmm1, %zmm0
+; GENERIC-NEXT: vsubpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: subpd512:
define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
; GENERIC-LABEL: subpd512fold:
; GENERIC: # BB#0: # %entry
-; GENERIC-NEXT: vsubpd (%rdi), %zmm0, %zmm0
+; GENERIC-NEXT: vsubpd (%rdi), %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: subpd512fold:
define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
; GENERIC-LABEL: subps512:
; GENERIC: # BB#0: # %entry
-; GENERIC-NEXT: vsubps %zmm0, %zmm1, %zmm0
+; GENERIC-NEXT: vsubps %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: subps512:
define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
; GENERIC-LABEL: subps512fold:
; GENERIC: # BB#0: # %entry
-; GENERIC-NEXT: vsubps (%rdi), %zmm0, %zmm0
+; GENERIC-NEXT: vsubps (%rdi), %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: subps512fold:
define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
; GENERIC-LABEL: imulq512:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vpmullq %zmm0, %zmm1, %zmm0
+; GENERIC-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: imulq512:
define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) {
; GENERIC-LABEL: imulq256:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vpmullq %ymm0, %ymm1, %ymm0
+; GENERIC-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: imulq256:
define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) {
; GENERIC-LABEL: imulq128:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vpmullq %xmm0, %xmm1, %xmm0
+; GENERIC-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: imulq128:
define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
; GENERIC-LABEL: mulpd512:
; GENERIC: # BB#0: # %entry
-; GENERIC-NEXT: vmulpd %zmm0, %zmm1, %zmm0
+; GENERIC-NEXT: vmulpd %zmm0, %zmm1, %zmm0 # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: mulpd512:
define <8 x double> @mulpd512fold(<8 x double> %y) {
; GENERIC-LABEL: mulpd512fold:
; GENERIC: # BB#0: # %entry
-; GENERIC-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0
+; GENERIC-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [9:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: mulpd512fold:
define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
; GENERIC-LABEL: mulps512:
; GENERIC: # BB#0: # %entry
-; GENERIC-NEXT: vmulps %zmm0, %zmm1, %zmm0
+; GENERIC-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: mulps512:
define <16 x float> @mulps512fold(<16 x float> %y) {
; GENERIC-LABEL: mulps512fold:
; GENERIC: # BB#0: # %entry
-; GENERIC-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0
+; GENERIC-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [9:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: mulps512fold:
define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
; GENERIC-LABEL: divpd512:
; GENERIC: # BB#0: # %entry
-; GENERIC-NEXT: vdivpd %zmm0, %zmm1, %zmm0
+; GENERIC-NEXT: vdivpd %zmm0, %zmm1, %zmm0 # sched: [24:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: divpd512:
define <8 x double> @divpd512fold(<8 x double> %y) {
; GENERIC-LABEL: divpd512fold:
; GENERIC: # BB#0: # %entry
-; GENERIC-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0
+; GENERIC-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [28:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: divpd512fold:
define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
; GENERIC-LABEL: divps512:
; GENERIC: # BB#0: # %entry
-; GENERIC-NEXT: vdivps %zmm0, %zmm1, %zmm0
+; GENERIC-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [24:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: divps512:
define <16 x float> @divps512fold(<16 x float> %y) {
; GENERIC-LABEL: divps512fold:
; GENERIC: # BB#0: # %entry
-; GENERIC-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0
+; GENERIC-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [28:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: divps512fold:
define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
; GENERIC-LABEL: vpaddq_test:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; GENERIC-NEXT: vpaddq %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddq_test:
define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
; GENERIC-LABEL: vpaddq_fold_test:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vpaddq (%rdi), %zmm0, %zmm0
+; GENERIC-NEXT: vpaddq (%rdi), %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddq_fold_test:
define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
; GENERIC-LABEL: vpaddq_broadcast_test:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddq_broadcast_test:
define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
; GENERIC-LABEL: vpaddq_broadcast2_test:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0
+; GENERIC-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddq_broadcast2_test:
define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
; GENERIC-LABEL: vpaddd_test:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_test:
define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
; GENERIC-LABEL: vpaddd_fold_test:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0
+; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_fold_test:
define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
; GENERIC-LABEL: vpaddd_broadcast_test:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_broadcast_test:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1}
+; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_mask_test:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_maskz_test:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1}
+; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_mask_fold_test:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1}
+; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_mask_broadcast_test:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
+; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_maskz_fold_test:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z}
+; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_maskz_broadcast_test:
define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
; GENERIC-LABEL: vpsubq_test:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vpsubq %zmm1, %zmm0, %zmm0
+; GENERIC-NEXT: vpsubq %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpsubq_test:
define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
; GENERIC-LABEL: vpsubd_test:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vpsubd %zmm1, %zmm0, %zmm0
+; GENERIC-NEXT: vpsubd %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpsubd_test:
define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
; GENERIC-LABEL: vpmulld_test:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vpmulld %zmm1, %zmm0, %zmm0
+; GENERIC-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpmulld_test:
define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
; GENERIC-LABEL: fadd_broadcast:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; GENERIC-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: fadd_broadcast:
define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
; GENERIC-LABEL: addq_broadcast:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: addq_broadcast:
define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
; GENERIC-LABEL: orq_broadcast:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; GENERIC-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: orq_broadcast:
define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
; GENERIC-LABEL: andd512fold:
; GENERIC: # BB#0: # %entry
-; GENERIC-NEXT: vandps (%rdi), %zmm0, %zmm0
+; GENERIC-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: andd512fold:
define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
; GENERIC-LABEL: andqbrst:
; GENERIC: # BB#0: # %entry
-; GENERIC-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0
+; GENERIC-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: andqbrst:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1}
+; GENERIC-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mask_vaddps:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1}
+; GENERIC-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mask_vmulps:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1}
+; GENERIC-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mask_vminps:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
+; GENERIC-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mask_vminpd:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1}
+; GENERIC-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mask_vmaxps:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
+; GENERIC-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mask_vmaxpd:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1}
+; GENERIC-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mask_vsubps:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1}
+; GENERIC-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [24:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mask_vdivps:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqq %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1}
+; GENERIC-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mask_vaddpd:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; GENERIC-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_maskz_vaddpd:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1}
+; GENERIC-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mask_fold_vaddpd:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqq %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z}
+; GENERIC-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_maskz_fold_vaddpd:
define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
; GENERIC-LABEL: test_broadcast_vaddpd:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0
+; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_broadcast_vaddpd:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqq %zmm0, %zmm2, %k1
-; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1}
+; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [7:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqq %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
+; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_maskz_broadcast_vaddpd:
define <16 x float> @test_fxor(<16 x float> %a) {
; GENERIC-LABEL: test_fxor:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_fxor:
define <8 x float> @test_fxor_8f32(<8 x float> %a) {
; GENERIC-LABEL: test_fxor_8f32:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0
+; GENERIC-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_fxor_8f32:
define <8 x double> @fabs_v8f64(<8 x double> %p)
; GENERIC-LABEL: fabs_v8f64:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; GENERIC-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: fabs_v8f64:
define <16 x float> @fabs_v16f32(<16 x float> %p)
; GENERIC-LABEL: fabs_v16f32:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; GENERIC-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: fabs_v16f32:
define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
; GENERIC-LABEL: test_x86_fmadd_ps_z:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0
-; GENERIC-NEXT: vaddps %zmm2, %zmm0, %zmm0
+; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
+; GENERIC-NEXT: vaddps %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_x86_fmadd_ps_z:
define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
; GENERIC-LABEL: test_x86_fmsub_ps_z:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0
-; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0
+; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
+; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_x86_fmsub_ps_z:
define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
; GENERIC-LABEL: test_x86_fnmadd_ps_z:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0
-; GENERIC-NEXT: vsubps %zmm0, %zmm2, %zmm0
+; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
+; GENERIC-NEXT: vsubps %zmm0, %zmm2, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_x86_fnmadd_ps_z:
define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
; GENERIC-LABEL: test_x86_fnmsub_ps_z:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0
-; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
-; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0
+; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
+; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_x86_fnmsub_ps_z:
define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
; GENERIC-LABEL: test_x86_fmadd_pd_z:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vmulpd %zmm1, %zmm0, %zmm0
-; GENERIC-NEXT: vaddpd %zmm2, %zmm0, %zmm0
+; GENERIC-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
+; GENERIC-NEXT: vaddpd %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_x86_fmadd_pd_z:
define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
; GENERIC-LABEL: test_x86_fmsub_pd_z:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vmulpd %zmm1, %zmm0, %zmm0
-; GENERIC-NEXT: vsubpd %zmm2, %zmm0, %zmm0
+; GENERIC-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
+; GENERIC-NEXT: vsubpd %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_x86_fmsub_pd_z:
define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
; GENERIC-LABEL: test231_br:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0
-; GENERIC-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; GENERIC-NEXT: vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [9:1.00]
+; GENERIC-NEXT: vaddps %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test231_br:
define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
; GENERIC-LABEL: test213_br:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0
-; GENERIC-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
+; GENERIC-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test213_br:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %xmm2, %k1
-; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm2
-; GENERIC-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1}
+; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm2 # sched: [9:1.00]
+; GENERIC-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_x86_fmadd132_ps:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %xmm2, %k1
-; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm0
-; GENERIC-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1}
+; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm0 # sched: [9:1.00]
+; GENERIC-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %xmm2, %k1
-; GENERIC-NEXT: vmulps %zmm0, %zmm1, %zmm0
-; GENERIC-NEXT: vaddps (%rdi), %zmm0, %zmm1 {%k1}
+; GENERIC-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [5:1.00]
+; GENERIC-NEXT: vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [7:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
; GENERIC-LABEL: vpandd:
; GENERIC: # BB#0: # %entry
-; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <16 x i32> @vpandnd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
; GENERIC-LABEL: vpandnd:
; GENERIC: # BB#0: # %entry
-; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
; GENERIC-LABEL: vpord:
; GENERIC: # BB#0: # %entry
-; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
; GENERIC-LABEL: vpxord:
; GENERIC: # BB#0: # %entry
-; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
; GENERIC-LABEL: vpandq:
; GENERIC: # BB#0: # %entry
-; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <8 x i64> @vpandnq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
; GENERIC-LABEL: vpandnq:
; GENERIC: # BB#0: # %entry
-; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
; GENERIC-LABEL: vporq:
; GENERIC: # BB#0: # %entry
-; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
; GENERIC-LABEL: vpxorq:
; GENERIC: # BB#0: # %entry
-; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <64 x i8> @and_v64i8(<64 x i8> %a, <64 x i8> %b) {
; GENERIC-LABEL: and_v64i8:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm0
+; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: and_v64i8:
define <64 x i8> @andn_v64i8(<64 x i8> %a, <64 x i8> %b) {
; GENERIC-LABEL: andn_v64i8:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vandnps %zmm0, %zmm1, %zmm0
+; GENERIC-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: andn_v64i8:
define <64 x i8> @or_v64i8(<64 x i8> %a, <64 x i8> %b) {
; GENERIC-LABEL: or_v64i8:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vorps %zmm1, %zmm0, %zmm0
+; GENERIC-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: or_v64i8:
define <64 x i8> @xor_v64i8(<64 x i8> %a, <64 x i8> %b) {
; GENERIC-LABEL: xor_v64i8:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0
+; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: xor_v64i8:
define <32 x i16> @and_v32i16(<32 x i16> %a, <32 x i16> %b) {
; GENERIC-LABEL: and_v32i16:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm0
+; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: and_v32i16:
define <32 x i16> @andn_v32i16(<32 x i16> %a, <32 x i16> %b) {
; GENERIC-LABEL: andn_v32i16:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vandnps %zmm0, %zmm1, %zmm0
+; GENERIC-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: andn_v32i16:
define <32 x i16> @or_v32i16(<32 x i16> %a, <32 x i16> %b) {
; GENERIC-LABEL: or_v32i16:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vorps %zmm1, %zmm0, %zmm0
+; GENERIC-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: or_v32i16:
define <32 x i16> @xor_v32i16(<32 x i16> %a, <32 x i16> %b) {
; GENERIC-LABEL: xor_v32i16:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0
+; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: xor_v32i16:
; GENERIC-LABEL: masked_and_v16f32:
; GENERIC: # BB#0:
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1}
-; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0
+; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: masked_and_v16f32:
; GENERIC-LABEL: masked_or_v16f32:
; GENERIC: # BB#0:
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1}
-; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0
+; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: masked_or_v16f32:
; GENERIC-LABEL: masked_xor_v16f32:
; GENERIC: # BB#0:
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1}
-; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0
+; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: masked_xor_v16f32:
; GENERIC-LABEL: masked_and_v8f64:
; GENERIC: # BB#0:
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1}
-; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0
+; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: masked_and_v8f64:
; GENERIC-LABEL: masked_or_v8f64:
; GENERIC: # BB#0:
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1}
-; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0
+; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: masked_or_v8f64:
; GENERIC-LABEL: masked_xor_v8f64:
; GENERIC: # BB#0:
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1}
-; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0
+; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: masked_xor_v8f64:
; GENERIC-LABEL: test_mm512_mask_and_epi32:
; GENERIC: # BB#0: # %entry
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1}
+; GENERIC-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mm512_mask_and_epi32:
; GENERIC-LABEL: test_mm512_mask_or_epi32:
; GENERIC: # BB#0: # %entry
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1}
+; GENERIC-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mm512_mask_or_epi32:
; GENERIC-LABEL: test_mm512_mask_xor_epi32:
; GENERIC: # BB#0: # %entry
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1}
+; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mm512_mask_xor_epi32:
; GENERIC-LABEL: test_mm512_mask_xor_pd:
; GENERIC: # BB#0: # %entry
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1}
+; GENERIC-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mm512_mask_xor_pd:
; GENERIC-LABEL: test_mm512_maskz_xor_pd:
; GENERIC: # BB#0: # %entry
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; GENERIC-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mm512_maskz_xor_pd:
; GENERIC-LABEL: test_mm512_mask_xor_ps:
; GENERIC: # BB#0: # %entry
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1}
+; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mm512_mask_xor_ps:
; GENERIC-LABEL: test_mm512_maskz_xor_ps:
; GENERIC: # BB#0: # %entry
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mm512_maskz_xor_ps:
; GENERIC-LABEL: test_mm512_mask_or_pd:
; GENERIC: # BB#0: # %entry
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1}
+; GENERIC-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mm512_mask_or_pd:
; GENERIC-LABEL: test_mm512_maskz_or_pd:
; GENERIC: # BB#0: # %entry
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z}
+; GENERIC-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mm512_maskz_or_pd:
; GENERIC-LABEL: test_mm512_mask_or_ps:
; GENERIC: # BB#0: # %entry
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1}
+; GENERIC-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mm512_mask_or_ps:
; GENERIC-LABEL: test_mm512_maskz_or_ps:
; GENERIC: # BB#0: # %entry
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z}
+; GENERIC-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mm512_maskz_or_ps:
; GENERIC-LABEL: test_mm512_mask_and_pd:
; GENERIC: # BB#0: # %entry
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1}
+; GENERIC-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mm512_mask_and_pd:
; GENERIC-LABEL: test_mm512_maskz_and_pd:
; GENERIC: # BB#0: # %entry
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z}
+; GENERIC-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mm512_maskz_and_pd:
; GENERIC-LABEL: test_mm512_mask_and_ps:
; GENERIC: # BB#0: # %entry
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1}
+; GENERIC-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mm512_mask_and_ps:
; GENERIC-LABEL: test_mm512_maskz_and_ps:
; GENERIC: # BB#0: # %entry
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z}
+; GENERIC-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mm512_maskz_and_ps:
; GENERIC-LABEL: test_mm512_mask_andnot_pd:
; GENERIC: # BB#0: # %entry
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1}
+; GENERIC-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mm512_mask_andnot_pd:
; GENERIC-LABEL: test_mm512_maskz_andnot_pd:
; GENERIC: # BB#0: # %entry
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; GENERIC-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mm512_maskz_andnot_pd:
; GENERIC-LABEL: test_mm512_mask_andnot_ps:
; GENERIC: # BB#0: # %entry
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1}
+; GENERIC-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mm512_mask_andnot_ps:
; GENERIC-LABEL: test_mm512_maskz_andnot_ps:
; GENERIC: # BB#0: # %entry
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; GENERIC-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mm512_maskz_andnot_ps:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm4, %xmm3, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [3:1.00]
; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm3, %xmm2, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm4, %xmm3, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [3:1.00]
; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm3, %xmm2, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mask1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm4, %xmm3, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [3:1.00]
; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm3, %xmm2, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mask2:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm4, %xmm3, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [3:1.00]
; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm3, %xmm2, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mask3:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm3, %xmm2, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1]
+; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm2, %xmm1, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1]
+; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm3, %xmm2, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1]
+; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm2, %xmm1, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1]
+; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm3, %xmm2, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1]
+; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm2, %xmm1, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1]
+; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm3, %xmm2, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1]
+; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm2, %xmm1, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1]
+; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [3:1.00]
; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [3:1.00]
; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mask1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [3:1.00]
; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mask2:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [3:1.00]
; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mask3:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3:
define <16 x float> @test_16xfloat_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2) {
; GENERIC-LABEL: test_16xfloat_unpack_low_mask0:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_unpack_low_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [3:1.00]
; GENERIC-NEXT: vmovaps %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [3:1.00]
; GENERIC-NEXT: vmovaps %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mask1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [3:1.00]
; GENERIC-NEXT: vmovaps %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mask2:
define <16 x float> @test_16xfloat_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2) {
; GENERIC-LABEL: test_16xfloat_unpack_low_mask3:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_unpack_low_mask3:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [3:1.00]
; GENERIC-NEXT: vmovaps %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mask3:
define <16 x float> @test_16xfloat_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) {
; GENERIC-LABEL: test_16xfloat_unpack_low_mem_mask0:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_unpack_low_mem_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2:
define <16 x float> @test_16xfloat_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) {
; GENERIC-LABEL: test_16xfloat_unpack_low_mem_mask3:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_unpack_low_mem_mask3:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %xmm4, %xmm3, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [3:1.00]
; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %xmm3, %xmm2, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %xmm4, %xmm3, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [3:1.00]
; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %xmm3, %xmm2, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mask1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %xmm3, %xmm2, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00]
; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %xmm2, %xmm1, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %xmm3, %xmm2, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00]
; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %xmm2, %xmm1, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [3:1.00]
; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [3:1.00]
; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mask1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [3:1.00]
; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mask2:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [3:1.00]
; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mask3:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3:
define <8 x double> @test_8xdouble_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2) {
; GENERIC-LABEL: test_8xdouble_unpack_low_mask0:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_unpack_low_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [3:1.00]
; GENERIC-NEXT: vmovapd %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [3:1.00]
; GENERIC-NEXT: vmovapd %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mask1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [3:1.00]
; GENERIC-NEXT: vmovapd %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mask2:
define <8 x double> @test_8xdouble_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2) {
; GENERIC-LABEL: test_8xdouble_unpack_low_mask3:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_unpack_low_mask3:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [3:1.00]
; GENERIC-NEXT: vmovapd %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mask3:
define <8 x double> @test_8xdouble_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) {
; GENERIC-LABEL: test_8xdouble_unpack_low_mem_mask0:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_unpack_low_mem_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2:
define <8 x double> @test_8xdouble_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) {
; GENERIC-LABEL: test_8xdouble_unpack_low_mem_mask3:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_unpack_low_mem_mask3:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm4, %xmm3, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [3:1.00]
; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm3, %xmm2, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm4, %xmm3, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [3:1.00]
; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm3, %xmm2, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mask1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm4, %xmm3, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [3:1.00]
; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm3, %xmm2, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mask2:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm4, %xmm3, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [3:1.00]
; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm3, %xmm2, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mask3:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm3, %xmm2, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3]
+; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm2, %xmm1, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3]
+; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm3, %xmm2, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3]
+; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm2, %xmm1, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3]
+; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm3, %xmm2, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3]
+; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm2, %xmm1, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3]
+; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm3, %xmm2, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3]
+; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm2, %xmm1, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3]
+; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [3:1.00]
; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [3:1.00]
; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mask1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [3:1.00]
; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mask2:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [3:1.00]
; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mask3:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3:
define <16 x float> @test_16xfloat_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2) {
; GENERIC-LABEL: test_16xfloat_unpack_high_mask0:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_unpack_high_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [3:1.00]
; GENERIC-NEXT: vmovaps %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [3:1.00]
; GENERIC-NEXT: vmovaps %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mask1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [3:1.00]
; GENERIC-NEXT: vmovaps %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mask2:
define <16 x float> @test_16xfloat_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2) {
; GENERIC-LABEL: test_16xfloat_unpack_high_mask3:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_unpack_high_mask3:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [3:1.00]
; GENERIC-NEXT: vmovaps %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mask3:
define <16 x float> @test_16xfloat_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) {
; GENERIC-LABEL: test_16xfloat_unpack_high_mem_mask0:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_unpack_high_mem_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2:
define <16 x float> @test_16xfloat_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) {
; GENERIC-LABEL: test_16xfloat_unpack_high_mem_mask3:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_unpack_high_mem_mask3:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %xmm4, %xmm3, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [3:1.00]
; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %xmm3, %xmm2, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %xmm4, %xmm3, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [3:1.00]
; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %xmm3, %xmm2, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mask1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %xmm3, %xmm2, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00]
; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %xmm2, %xmm1, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %xmm3, %xmm2, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00]
; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %xmm2, %xmm1, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [3:1.00]
; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [3:1.00]
; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mask1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [3:1.00]
; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mask2:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [3:1.00]
; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mask3:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3:
define <8 x double> @test_8xdouble_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2) {
; GENERIC-LABEL: test_8xdouble_unpack_high_mask0:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_unpack_high_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [3:1.00]
; GENERIC-NEXT: vmovapd %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [3:1.00]
; GENERIC-NEXT: vmovapd %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mask1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [3:1.00]
; GENERIC-NEXT: vmovapd %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mask2:
define <8 x double> @test_8xdouble_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2) {
; GENERIC-LABEL: test_8xdouble_unpack_high_mask3:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_unpack_high_mask3:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [3:1.00]
; GENERIC-NEXT: vmovapd %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mask3:
define <8 x double> @test_8xdouble_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) {
; GENERIC-LABEL: test_8xdouble_unpack_high_mem_mask0:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_unpack_high_mem_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2:
define <8 x double> @test_8xdouble_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) {
; GENERIC-LABEL: test_8xdouble_unpack_high_mem_mask3:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_unpack_high_mem_mask3:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3: