multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
string OpcodeStr, RegisterClass KRC,
ValueType vvt, X86MemOperand x86memop> {
- let hasSideEffects = 0 in
+ let hasSideEffects = 0, SchedRW = [WriteMove] in
def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
+ IIC_SSE_MOVDQ>;
def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set KRC:$dst, (vvt (load addr:$src)))]>;
+ [(set KRC:$dst, (vvt (load addr:$src)))], IIC_SSE_MOVDQ>;
def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(store KRC:$src, addr:$dst)]>;
+ [(store KRC:$src, addr:$dst)], IIC_SSE_MOVDQ>;
}
multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
RegisterClass KRC, RegisterClass GRC> {
let hasSideEffects = 0 in {
def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
+ IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
+ IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
}
}
// - KNOT
multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
RegisterClass KRC, SDPatternOperator OpNode,
- Predicate prd> {
+ OpndItins itins, Predicate prd> {
let Predicates = [prd] in
def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set KRC:$dst, (OpNode KRC:$src))]>;
+ [(set KRC:$dst, (OpNode KRC:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
}
multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode> {
+ SDPatternOperator OpNode, OpndItins itins> {
defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
- HasDQI>, VEX, PD;
+ itins, HasDQI>, VEX, PD;
defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
- HasAVX512>, VEX, PS;
+ itins, HasAVX512>, VEX, PS;
defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
- HasBWI>, VEX, PD, VEX_W;
+ itins, HasBWI>, VEX, PD, VEX_W;
defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
- HasBWI>, VEX, PS, VEX_W;
+ itins, HasBWI>, VEX, PS, VEX_W;
}
-defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot>;
+defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SSE_BIT_ITINS_P>;
// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
let Predicates = [HasAVX512, NoDQI] in
// - KAND, KANDN, KOR, KXNOR, KXOR
multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
RegisterClass KRC, SDPatternOperator OpNode,
- Predicate prd, bit IsCommutable> {
+ OpndItins itins, Predicate prd, bit IsCommutable> {
let Predicates = [prd], isCommutable = IsCommutable in
def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>;
+ [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
}
multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode, bit IsCommutable,
- Predicate prdW = HasAVX512> {
+ SDPatternOperator OpNode, OpndItins itins,
+ bit IsCommutable, Predicate prdW = HasAVX512> {
defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
- HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
+ itins, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
- prdW, IsCommutable>, VEX_4V, VEX_L, PS;
+ itins, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
- HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
+ itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
- HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
+ itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
}
def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
-defm KAND : avx512_mask_binop_all<0x41, "kand", and, 1>;
-defm KOR : avx512_mask_binop_all<0x45, "kor", or, 1>;
-defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, 1>;
-defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, 1>;
-defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, 0>;
-defm KADD : avx512_mask_binop_all<0x4A, "kadd", add, 1, HasDQI>;
+defm KAND : avx512_mask_binop_all<0x41, "kand", and, SSE_BIT_ITINS_P, 1>;
+defm KOR : avx512_mask_binop_all<0x45, "kor", or, SSE_BIT_ITINS_P, 1>;
+defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SSE_BIT_ITINS_P, 1>;
+defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SSE_BIT_ITINS_P, 1>;
+defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SSE_BIT_ITINS_P, 0>;
+defm KADD : avx512_mask_binop_all<0x4A, "kadd", add, SSE_BIT_ITINS_P, 1, HasDQI>;
multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
Instruction Inst> {
// Mask unpacking
multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
- RegisterClass KRCSrc, Predicate prd> {
+ RegisterClass KRCSrc, OpndItins itins, Predicate prd> {
let Predicates = [prd] in {
let hasSideEffects = 0 in
def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
(ins KRC:$src1, KRC:$src2),
- "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- VEX_4V, VEX_L;
+ "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
+ itins.rr>, VEX_4V, VEX_L, Sched<[itins.Sched]>;
def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
(!cast<Instruction>(NAME##rr)
}
}
-defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, HasAVX512>, PD;
-defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, HasBWI>, PS;
-defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, HasBWI>, PS, VEX_W;
+defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, SSE_UNPCK, HasAVX512>, PD;
+defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, SSE_UNPCK, HasBWI>, PS;
+defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, SSE_UNPCK, HasBWI>, PS, VEX_W;
// Mask bit testing
multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
- SDNode OpNode, Predicate prd> {
+ SDNode OpNode, OpndItins itins, Predicate prd> {
let Predicates = [prd], Defs = [EFLAGS] in
def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
- [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>;
+ [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
}
multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
- Predicate prdW = HasAVX512> {
- defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, HasDQI>,
+ OpndItins itins, Predicate prdW = HasAVX512> {
+ defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, itins, HasDQI>,
VEX, PD;
- defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, prdW>,
+ defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, itins, prdW>,
VEX, PS;
- defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, HasBWI>,
+ defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, itins, HasBWI>,
VEX, PS, VEX_W;
- defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, HasBWI>,
+ defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, itins, HasBWI>,
VEX, PD, VEX_W;
}
-defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
-defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, HasDQI>;
+defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SSE_PTEST>;
+defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SSE_PTEST, HasDQI>;
// Mask shift
multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
- SDNode OpNode> {
+ SDNode OpNode, OpndItins itins> {
let Predicates = [HasAVX512] in
def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
!strconcat(OpcodeStr,
"\t{$imm, $src, $dst|$dst, $src, $imm}"),
- [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>;
+ [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))],
+ itins.rr>, Sched<[itins.Sched]>;
}
multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
- SDNode OpNode> {
- defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
- VEX, TAPD, VEX_W;
+ SDNode OpNode, OpndItins itins> {
+ defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
+ itins>, VEX, TAPD, VEX_W;
let Predicates = [HasDQI] in
- defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode>,
- VEX, TAPD;
+ defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
+ itins>, VEX, TAPD;
let Predicates = [HasBWI] in {
- defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode>,
- VEX, TAPD, VEX_W;
- defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode>,
- VEX, TAPD;
+ defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
+ itins>, VEX, TAPD, VEX_W;
+ defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
+ itins>, VEX, TAPD;
}
}
-defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl>;
-defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr>;
+defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, SSE_PSHUF>;
+defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, SSE_PSHUF>;
multiclass axv512_icmp_packed_no_vlx_lowering<SDNode OpNode, string InstStr> {
def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
// Mask setting all 0s or 1s
multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
let Predicates = [HasAVX512] in
- let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in
+ let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
+ SchedRW = [WriteZero] in
def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
[(set KRC:$dst, (VT Val))]>;
}
; GENERIC-LABEL: test3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vcmpeqss %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: movzbl %al, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <8 x double> @i32to8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
; GENERIC-LABEL: i32to8f64_mask:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <8 x double> @sito8f64_maskz(<8 x i32> %a, i8 %b) nounwind {
; GENERIC-LABEL: sito8f64_maskz:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <8 x double> @uito8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
; GENERIC-LABEL: uito8f64_mask:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <8 x double> @uito8f64_maskz(<8 x i32> %a, i8 %b) nounwind {
; GENERIC-LABEL: uito8f64_maskz:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-NEXT: movl {{.*}}(%rip), %eax # sched: [5:0.50]
; GENERIC-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: kshiftrw $8, %k1, %k1
+; GENERIC-NEXT: kshiftrw $8, %k1, %k1 # sched: [1:1.00]
; GENERIC-NEXT: vpbroadcastd %eax, %ymm1 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
define <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
; GENERIC-LABEL: zext_16i1_to_16xi32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
; GENERIC-LABEL: zext_8i1_to_8xi64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %ax<def> %ax<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpslld $31, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %ax<def> %ax<kill> %eax<kill>
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %al<def> %al<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: trunc_i32_to_i1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: movw $-4, %ax # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %eax, %k0
-; GENERIC-NEXT: kshiftrw $1, %k0, %k0
-; GENERIC-NEXT: kshiftlw $1, %k0, %k0
+; GENERIC-NEXT: kmovd %eax, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: kshiftrw $1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftlw $1, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33]
-; GENERIC-NEXT: kmovw %edi, %k1
-; GENERIC-NEXT: korw %k1, %k0, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovw %edi, %k1 # sched: [1:0.33]
+; GENERIC-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %ax<def> %ax<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-NEXT: vpsllw $7, %zmm2, %zmm2 # sched: [3:1.00]
; GENERIC-NEXT: vpmovb2m %zmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
-; GENERIC-NEXT: kshiftrq $32, %k1, %k1
+; GENERIC-NEXT: kshiftrq $32, %k1, %k1 # sched: [1:1.00]
; GENERIC-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z} # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <16 x float> @masked_and_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) {
; GENERIC-LABEL: masked_and_v16f32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
define <16 x float> @masked_or_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) {
; GENERIC-LABEL: masked_or_v16f32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
define <16 x float> @masked_xor_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) {
; GENERIC-LABEL: masked_xor_v16f32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
define <8 x double> @masked_and_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) {
; GENERIC-LABEL: masked_and_v8f64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
define <8 x double> @masked_or_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) {
; GENERIC-LABEL: masked_or_v8f64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
define <8 x double> @masked_xor_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) {
; GENERIC-LABEL: masked_xor_v8f64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
define <8 x i64> @test_mm512_mask_and_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) {
; GENERIC-LABEL: test_mm512_mask_and_epi32:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <8 x i64> @test_mm512_mask_or_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) {
; GENERIC-LABEL: test_mm512_mask_or_epi32:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <8 x i64> @test_mm512_mask_xor_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) {
; GENERIC-LABEL: test_mm512_mask_xor_epi32:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <8 x double> @test_mm512_mask_xor_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; GENERIC-LABEL: test_mm512_mask_xor_pd:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <8 x double> @test_mm512_maskz_xor_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; GENERIC-LABEL: test_mm512_maskz_xor_pd:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <16 x float> @test_mm512_mask_xor_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; GENERIC-LABEL: test_mm512_mask_xor_ps:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <16 x float> @test_mm512_maskz_xor_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; GENERIC-LABEL: test_mm512_maskz_xor_ps:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <8 x double> @test_mm512_mask_or_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; GENERIC-LABEL: test_mm512_mask_or_pd:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <8 x double> @test_mm512_maskz_or_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; GENERIC-LABEL: test_mm512_maskz_or_pd:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <16 x float> @test_mm512_mask_or_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; GENERIC-LABEL: test_mm512_mask_or_ps:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <16 x float> @test_mm512_maskz_or_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; GENERIC-LABEL: test_mm512_maskz_or_ps:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <8 x double> @test_mm512_mask_and_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; GENERIC-LABEL: test_mm512_mask_and_pd:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <8 x double> @test_mm512_maskz_and_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; GENERIC-LABEL: test_mm512_maskz_and_pd:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <16 x float> @test_mm512_mask_and_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; GENERIC-LABEL: test_mm512_mask_and_ps:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <16 x float> @test_mm512_maskz_and_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; GENERIC-LABEL: test_mm512_maskz_and_ps:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <8 x double> @test_mm512_mask_andnot_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; GENERIC-LABEL: test_mm512_mask_andnot_pd:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <8 x double> @test_mm512_maskz_andnot_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; GENERIC-LABEL: test_mm512_maskz_andnot_pd:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <16 x float> @test_mm512_mask_andnot_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; GENERIC-LABEL: test_mm512_mask_andnot_ps:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define <16 x float> @test_mm512_maskz_andnot_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; GENERIC-LABEL: test_mm512_maskz_andnot_ps:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define i16 @mask16(i16 %x) {
; GENERIC-LABEL: mask16:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0
-; GENERIC-NEXT: knotw %k0, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %ax<def> %ax<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define i32 @mask16_zext(i16 %x) {
; GENERIC-LABEL: mask16_zext:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0
-; GENERIC-NEXT: knotw %k0, %k0
-; GENERIC-NEXT: kmovw %k0, %eax
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovw %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: mask16_zext:
define i8 @mask8(i8 %x) {
; GENERIC-LABEL: mask8:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0
-; GENERIC-NEXT: knotb %k0, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %al<def> %al<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define i32 @mask8_zext(i8 %x) {
; GENERIC-LABEL: mask8_zext:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0
-; GENERIC-NEXT: knotb %k0, %k0
-; GENERIC-NEXT: kmovb %k0, %eax
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovb %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: mask8_zext:
; GENERIC-LABEL: mask16_mem:
; GENERIC: # %bb.0:
; GENERIC-NEXT: kmovw (%rdi), %k0
-; GENERIC-NEXT: knotw %k0, %k0
+; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovw %k0, (%rdi)
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: mask8_mem:
; GENERIC: # %bb.0:
; GENERIC-NEXT: kmovb (%rdi), %k0
-; GENERIC-NEXT: knotb %k0, %k0
+; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovb %k0, (%rdi)
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # %bb.0:
; GENERIC-NEXT: kmovw (%rdi), %k0
; GENERIC-NEXT: kmovw (%rsi), %k1
-; GENERIC-NEXT: kandw %k1, %k0, %k2
-; GENERIC-NEXT: kxorw %k1, %k0, %k0
-; GENERIC-NEXT: korw %k0, %k2, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kandw %k1, %k0, %k2 # sched: [1:1.00]
+; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: korw %k0, %k2, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %ax<def> %ax<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define i8 @shuf_test1(i16 %v) nounwind {
; GENERIC-LABEL: shuf_test1:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0
-; GENERIC-NEXT: kshiftrw $8, %k0, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: kshiftrw $8, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %al<def> %al<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: zext_test1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: kshiftlw $10, %k0, %k0
-; GENERIC-NEXT: kshiftrw $15, %k0, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kshiftlw $10, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftrw $15, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: andl $1, %eax # sched: [1:0.33]
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
; GENERIC-LABEL: zext_test2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: kshiftlw $10, %k0, %k0
-; GENERIC-NEXT: kshiftrw $15, %k0, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kshiftlw $10, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftrw $15, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: andl $1, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %ax<def> %ax<kill> %eax<kill>
; GENERIC-NEXT: vzeroupper
; GENERIC-LABEL: zext_test3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: kshiftlw $10, %k0, %k0
-; GENERIC-NEXT: kshiftrw $15, %k0, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kshiftlw $10, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftrw $15, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: andb $1, %al # sched: [1:0.33]
; GENERIC-NEXT: # kill: %al<def> %al<kill> %eax<kill>
; GENERIC-NEXT: vzeroupper
define i8 @conv1(<8 x i1>* %R) {
; GENERIC-LABEL: conv1:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kxnorw %k0, %k0, %k0
+; GENERIC-NEXT: kxnorw %k0, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovb %k0, (%rdi)
; GENERIC-NEXT: movb $-2, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
; GENERIC-NEXT: movb $-2, %al # sched: [1:0.33]
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpgtq %ymm3, %ymm2, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: kandnw %k0, %k1, %k0
+; GENERIC-NEXT: kandnw %k0, %k1, %k0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpgtq %xmm3, %xmm2, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: kandnw %k1, %k0, %k0
+; GENERIC-NEXT: kandnw %k1, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33]
; GENERIC-NEXT: movb $85, %al # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %eax, %k1
-; GENERIC-NEXT: korb %k1, %k0, %k0
-; GENERIC-NEXT: ktestb %k0, %k0
+; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
+; GENERIC-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: ktestb %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vcmp_test7:
; GENERIC-NEXT: # sched: [1:0.33]
; GENERIC-NEXT: movw $1, %cx # sched: [1:0.33]
; GENERIC-NEXT: cmovgw %ax, %cx # sched: [2:0.67]
-; GENERIC-NEXT: kmovd %ecx, %k0
+; GENERIC-NEXT: kmovd %ecx, %k0 # sched: [1:0.33]
; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
;
; GENERIC-LABEL: vmov_test16:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovq %rdi, %k0
+; GENERIC-NEXT: kmovq %rdi, %k0 # sched: [1:0.33]
; GENERIC-NEXT: movb $1, %al # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %eax, %k1
+; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovm2b %k1, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: vpsllq $40, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovm2b %k0, %zmm1 # sched: [1:0.33]
; GENERIC-NEXT: movl $32, %eax # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %eax, %k1
+; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [2:1.00]
; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:0.33]
;
; GENERIC-LABEL: vmov_test17:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovq %rdi, %k0
+; GENERIC-NEXT: kmovq %rdi, %k0 # sched: [1:0.33]
; GENERIC-NEXT: cmpl %edx, %esi # sched: [1:0.33]
; GENERIC-NEXT: setg %al # sched: [1:0.50]
-; GENERIC-NEXT: kmovd %eax, %k1
+; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovm2b %k1, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: vpsllq $40, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovm2b %k0, %zmm1 # sched: [1:0.33]
; GENERIC-NEXT: movl $32, %eax # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %eax, %k1
+; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [2:1.00]
; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:0.33]
define <8 x i1> @vmov_test18(i8 %a, i16 %y) {
; GENERIC-LABEL: vmov_test18:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: kmovd %esi, %k2
-; GENERIC-NEXT: kshiftlw $7, %k2, %k0
-; GENERIC-NEXT: kshiftrw $15, %k0, %k0
-; GENERIC-NEXT: kshiftlw $6, %k2, %k2
-; GENERIC-NEXT: kshiftrw $15, %k2, %k2
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
+; GENERIC-NEXT: kmovd %esi, %k2 # sched: [1:0.33]
+; GENERIC-NEXT: kshiftlw $7, %k2, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftrw $15, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftlw $6, %k2, %k2 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftrw $15, %k2, %k2 # sched: [1:1.00]
; GENERIC-NEXT: vpmovm2q %k1, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: vpmovm2q %k2, %zmm1 # sched: [1:0.33]
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7] sched: [4:0.50]
; GENERIC-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 # sched: [1:1.00]
; GENERIC-NEXT: vpmovq2m %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: kshiftlb $1, %k1, %k1
-; GENERIC-NEXT: kshiftrb $1, %k1, %k1
-; GENERIC-NEXT: kshiftlb $7, %k0, %k0
-; GENERIC-NEXT: korb %k0, %k1, %k0
+; GENERIC-NEXT: kshiftlb $1, %k1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftrb $1, %k1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftlb $7, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: korb %k0, %k1, %k0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) {
; GENERIC-LABEL: store_v1i1:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0
-; GENERIC-NEXT: kxnorw %k0, %k0, %k1
-; GENERIC-NEXT: kxorw %k1, %k0, %k0
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: kxnorw %k0, %k0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovb %k0, (%rsi)
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: knotw %k0, %k0
+; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovb %k0, (%rdi)
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: knotw %k0, %k0
+; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovb %k0, (%rdi)
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: knotb %k0, %k0
+; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovb %k0, (%rdi)
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: knotw %k0, %k0
+; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovw %k0, (%rdi)
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # %bb.0:
; GENERIC-NEXT: movl $1497715861, %eax # imm = 0x59455495
; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %eax, %k1
+; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [4:0.50]
; GENERIC-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00]
-; GENERIC-NEXT: ktestb %k0, %k0
+; GENERIC-NEXT: ktestb %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: je .LBB410_2 # sched: [1:1.00]
; GENERIC-NEXT: # %bb.1: # %L1
; GENERIC-NEXT: vmovapd %zmm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: vmovups 64(%rdi), %zmm3 # sched: [4:0.50]
; GENERIC-NEXT: vcmpltps %zmm0, %zmm2, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vcmpltps %zmm1, %zmm3, %k2 # sched: [3:1.00]
-; GENERIC-NEXT: kunpckwd %k1, %k2, %k0
+; GENERIC-NEXT: kunpckwd %k1, %k2, %k0 # sched: [1:1.00]
; GENERIC-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [4:0.50]
; GENERIC-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [4:0.50]
; GENERIC-NEXT: vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00]
-; GENERIC-NEXT: kunpckwd %k1, %k2, %k1
-; GENERIC-NEXT: kord %k1, %k0, %k0
-; GENERIC-NEXT: ktestd %k0, %k0
+; GENERIC-NEXT: kunpckwd %k1, %k2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT: kord %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: ktestd %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: je .LBB411_2 # sched: [1:1.00]
; GENERIC-NEXT: # %bb.1: # %L1
; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: kmovb %k0, %eax
+; GENERIC-NEXT: kmovb %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: addl %eax, %eax # sched: [1:0.33]
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: kmovw %k0, %eax
+; GENERIC-NEXT: kmovw %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: addl %eax, %eax # sched: [1:0.33]
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
define i16 @test_v16i1_add(i16 %x, i16 %y) {
; GENERIC-LABEL: test_v16i1_add:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0
-; GENERIC-NEXT: kmovd %esi, %k1
-; GENERIC-NEXT: kxorw %k1, %k0, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
+; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %ax<def> %ax<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define i16 @test_v16i1_sub(i16 %x, i16 %y) {
; GENERIC-LABEL: test_v16i1_sub:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0
-; GENERIC-NEXT: kmovd %esi, %k1
-; GENERIC-NEXT: kxorw %k1, %k0, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
+; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %ax<def> %ax<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define i16 @test_v16i1_mul(i16 %x, i16 %y) {
; GENERIC-LABEL: test_v16i1_mul:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0
-; GENERIC-NEXT: kmovd %esi, %k1
-; GENERIC-NEXT: kandw %k1, %k0, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
+; GENERIC-NEXT: kandw %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %ax<def> %ax<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define i8 @test_v8i1_add(i8 %x, i8 %y) {
; GENERIC-LABEL: test_v8i1_add:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0
-; GENERIC-NEXT: kmovd %esi, %k1
-; GENERIC-NEXT: kxorb %k1, %k0, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
+; GENERIC-NEXT: kxorb %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %al<def> %al<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define i8 @test_v8i1_sub(i8 %x, i8 %y) {
; GENERIC-LABEL: test_v8i1_sub:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0
-; GENERIC-NEXT: kmovd %esi, %k1
-; GENERIC-NEXT: kxorb %k1, %k0, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
+; GENERIC-NEXT: kxorb %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %al<def> %al<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
;
define i8 @test_v8i1_mul(i8 %x, i8 %y) {
; GENERIC-LABEL: test_v8i1_mul:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0
-; GENERIC-NEXT: kmovd %esi, %k1
-; GENERIC-NEXT: kandb %k1, %k0, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
+; GENERIC-NEXT: kandb %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %al<def> %al<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00]
; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: knotw %k0, %k1
+; GENERIC-NEXT: knotw %k0, %k1 # sched: [1:1.00]
; GENERIC-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;