}]>;
multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
- X86VectorVTInfo _> {
+ OpndItins itins, X86VectorVTInfo _> {
let isCommutable = 1 in
def rri : AVX512AIi8<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
imm:$cc))],
- IIC_SSE_ALU_F32P_RR>, EVEX_4V;
+ itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
def rmi : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
!strconcat("vpcmp${cc}", Suffix,
[(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))),
imm:$cc))],
- IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+ itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
let isCommutable = 1 in
def rrik : AVX512AIi8<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
[(set _.KRC:$dst, (and _.KRCWM:$mask,
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
imm:$cc)))],
- IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
+ itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
def rmik : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
AVX512ICC:$cc),
(OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))),
imm:$cc)))],
- IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
+ itins.rm>, EVEX_4V, EVEX_K,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
!strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
"$dst, $src1, $src2, $cc}"),
- [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
+ [], itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
let mayLoad = 1 in
def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
!strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
"$dst, $src1, $src2, $cc}"),
- [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+ [], itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
u8imm:$cc),
!strconcat("vpcmp", Suffix,
"\t{$cc, $src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2, $cc}"),
- [], IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
+ [], itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
let mayLoad = 1 in
def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
!strconcat("vpcmp", Suffix,
"\t{$cc, $src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2, $cc}"),
- [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
+ [], itins.rm>, EVEX_4V, EVEX_K,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
def : Pat<(OpNode (bitconvert (_.LdFrag addr:$src2)),
}
multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
- X86VectorVTInfo _> :
- avx512_icmp_cc<opc, Suffix, OpNode, _> {
+ OpndItins itins, X86VectorVTInfo _> :
+ avx512_icmp_cc<opc, Suffix, OpNode, itins, _> {
def rmib : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
AVX512ICC:$cc),
[(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
(X86VBroadcast (_.ScalarLdFrag addr:$src2)),
imm:$cc))],
- IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
+ itins.rm>, EVEX_4V, EVEX_B,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
def rmibk : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
_.ScalarMemOp:$src2, AVX512ICC:$cc),
(OpNode (_.VT _.RC:$src1),
(X86VBroadcast (_.ScalarLdFrag addr:$src2)),
imm:$cc)))],
- IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
+ itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
!strconcat("vpcmp", Suffix,
"\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
"$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
- [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
+ [], itins.rm>, EVEX_4V, EVEX_B,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
_.ScalarMemOp:$src2, u8imm:$cc),
!strconcat("vpcmp", Suffix,
"\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
- [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
+ [], itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
def : Pat<(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
}
multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,
- AVX512VLVectorVTInfo VTInfo, Predicate prd> {
+ OpndItins itins, AVX512VLVectorVTInfo VTInfo,
+ Predicate prd> {
let Predicates = [prd] in
- defm Z : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info512>, EVEX_V512;
+ defm Z : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info512>,
+ EVEX_V512;
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info256>, EVEX_V256;
- defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info128>, EVEX_V128;
+ defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info256>,
+ EVEX_V256;
+ defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info128>,
+ EVEX_V128;
}
}
multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode,
- AVX512VLVectorVTInfo VTInfo, Predicate prd> {
+ OpndItins itins, AVX512VLVectorVTInfo VTInfo,
+ Predicate prd> {
let Predicates = [prd] in
- defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info512>,
+ defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info512>,
EVEX_V512;
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info256>,
+ defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info256>,
EVEX_V256;
- defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info128>,
+ defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info128>,
EVEX_V128;
}
}
-defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, avx512vl_i8_info,
- HasBWI>, EVEX_CD8<8, CD8VF>;
-defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, avx512vl_i8_info,
- HasBWI>, EVEX_CD8<8, CD8VF>;
-
-defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, avx512vl_i16_info,
- HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
-defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, avx512vl_i16_info,
- HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
-
-defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, avx512vl_i32_info,
- HasAVX512>, EVEX_CD8<32, CD8VF>;
-defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, avx512vl_i32_info,
- HasAVX512>, EVEX_CD8<32, CD8VF>;
-
-defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info,
- HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
- HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
+// FIXME: Is there a better scheduler itinerary for VPCMP/VPCMPU?
+defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, SSE_ALU_F32P,
+ avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>;
+defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, SSE_ALU_F32P,
+ avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>;
+
+defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, SSE_ALU_F32P,
+ avx512vl_i16_info, HasBWI>,
+ VEX_W, EVEX_CD8<16, CD8VF>;
+defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, SSE_ALU_F32P,
+ avx512vl_i16_info, HasBWI>,
+ VEX_W, EVEX_CD8<16, CD8VF>;
+
+defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, SSE_ALU_F32P,
+ avx512vl_i32_info, HasAVX512>,
+ EVEX_CD8<32, CD8VF>;
+defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, SSE_ALU_F32P,
+ avx512vl_i32_info, HasAVX512>,
+ EVEX_CD8<32, CD8VF>;
+
+defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, SSE_ALU_F32P,
+ avx512vl_i64_info, HasAVX512>,
+ VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, SSE_ALU_F32P,
+ avx512vl_i64_info, HasAVX512>,
+ VEX_W, EVEX_CD8<64, CD8VF>;
multiclass avx512_vcmp_common<OpndItins itins, X86VectorVTInfo _> {
; GENERIC-LABEL: vpaddd_mask_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: vpaddd_maskz_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: vpaddd_mask_fold_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: vpaddd_mask_broadcast_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: vpaddd_maskz_fold_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: vpaddd_maskz_broadcast_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: test_mask_vaddps:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: test_mask_vmulps:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: test_mask_vminps:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: test_mask_vminpd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
+; GENERIC-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: test_mask_vmaxps:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: test_mask_vmaxpd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
+; GENERIC-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: test_mask_vsubps:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: test_mask_vdivps:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [24:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: test_mask_vaddpd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqq %zmm4, %zmm3, %k1
+; GENERIC-NEXT: vpcmpneqq %zmm4, %zmm3, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: test_maskz_vaddpd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT: vpcmpneqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: test_mask_fold_vaddpd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT: vpcmpneqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: test_maskz_fold_vaddpd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: test_mask_broadcast_vaddpd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqq %zmm0, %zmm2, %k1
+; GENERIC-NEXT: vpcmpneqq %zmm0, %zmm2, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [7:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
; GENERIC-LABEL: test_maskz_broadcast_vaddpd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [7:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50]
-; GENERIC-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
+; GENERIC-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50]
-; GENERIC-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
+; GENERIC-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [4:0.50]
; GENERIC-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
; GENERIC-LABEL: sext_8i1_8i32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpled %ymm0, %ymm1, %k0
+; GENERIC-NEXT: vpcmpled %ymm0, %ymm1, %k0 # sched: [3:1.00]
; GENERIC-NEXT: vpmovm2d %k0, %ymm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: mov_test32:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: mov_test33:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: mov_test34:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
+; GENERIC-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} {z} # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: mov_test35:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
+; GENERIC-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: mov_test36:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: mov_test37:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: mov_test38:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqq %zmm1, %zmm0, %k1
+; GENERIC-NEXT: vpcmpneqq %zmm1, %zmm0, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} {z} # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: mov_test39:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqq %zmm1, %zmm0, %k1
+; GENERIC-NEXT: vpcmpneqq %zmm1, %zmm0, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
; GENERIC-LABEL: zext_test1:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
+; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
; GENERIC-NEXT: kshiftlw $10, %k0, %k0
; GENERIC-NEXT: kshiftrw $15, %k0, %k0
; GENERIC-NEXT: kmovd %k0, %eax
define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
; GENERIC-LABEL: zext_test2:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
+; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
; GENERIC-NEXT: kshiftlw $10, %k0, %k0
; GENERIC-NEXT: kshiftrw $15, %k0, %k0
; GENERIC-NEXT: kmovd %k0, %eax
define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
; GENERIC-LABEL: zext_test3:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
+; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
; GENERIC-NEXT: kshiftlw $10, %k0, %k0
; GENERIC-NEXT: kshiftrw $15, %k0, %k0
; GENERIC-NEXT: kmovd %k0, %eax
; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33]
; GENERIC-NEXT: jg .LBB386_1 # sched: [1:1.00]
; GENERIC-NEXT: # %bb.2:
-; GENERIC-NEXT: vpcmpltud %zmm2, %zmm1, %k0
+; GENERIC-NEXT: vpcmpltud %zmm2, %zmm1, %k0 # sched: [3:1.00]
; GENERIC-NEXT: vpmovm2b %k0, %xmm0
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
; GENERIC-LABEL: _ss16xfloat_mask:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vbroadcastss %xmm0, %zmm1 {%k1}
; GENERIC-NEXT: vmovaps %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
; GENERIC-LABEL: _ss16xfloat_maskz:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: _ss16xfloat_mask_load:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1}
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: _ss16xfloat_maskz_load:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
+; GENERIC-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z}
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: _sd8xdouble_mask:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT: vpcmpneqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
; GENERIC-NEXT: vmovapd %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
; GENERIC-LABEL: _sd8xdouble_maskz:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: _sd8xdouble_mask_load:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1}
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: _sd8xdouble_maskz_load:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpneqd %ymm1, %ymm0, %k1
+; GENERIC-NEXT: vpcmpneqd %ymm1, %ymm0, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z}
; GENERIC-NEXT: retq # sched: [1:1.00]
;