SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
}
-multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
- PatFrag OpNode_su, X86FoldableSchedWrite sched,
+multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
+ X86FoldableSchedWrite sched,
X86VectorVTInfo _, bit IsCommutable> {
- let isCommutable = IsCommutable in
+ let isCommutable = IsCommutable, hasSideEffects = 0 in
def rr : AVX512BI<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))]>,
- EVEX_4V, Sched<[sched]>;
+ []>, EVEX_4V, Sched<[sched]>;
+ let mayLoad = 1, hasSideEffects = 0 in
def rm : AVX512BI<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
- (_.VT (_.LdFrag addr:$src2))))]>,
- EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
- let isCommutable = IsCommutable in
+ []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ let isCommutable = IsCommutable, hasSideEffects = 0 in
def rrk : AVX512BI<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2}"),
- [(set _.KRC:$dst, (and _.KRCWM:$mask,
- (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2))))]>,
- EVEX_4V, EVEX_K, Sched<[sched]>;
+ []>, EVEX_4V, EVEX_K, Sched<[sched]>;
+ let mayLoad = 1, hasSideEffects = 0 in
def rmk : AVX512BI<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2}"),
- [(set _.KRC:$dst, (and _.KRCWM:$mask,
- (OpNode_su (_.VT _.RC:$src1),
- (_.VT (_.LdFrag addr:$src2)))))]>,
- EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
-multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
- PatFrag OpNode_su,
+multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _,
bit IsCommutable> :
- avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched, _, IsCommutable> {
+ avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
+ let mayLoad = 1, hasSideEffects = 0 in {
def rmb : AVX512BI<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
!strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
"|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
- [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
- (_.BroadcastLdFrag addr:$src2)))]>,
- EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmbk : AVX512BI<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
_.ScalarMemOp:$src2),
!strconcat(OpcodeStr,
"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
- [(set _.KRC:$dst, (and _.KRCWM:$mask,
- (OpNode_su (_.VT _.RC:$src1),
- (_.BroadcastLdFrag addr:$src2))))]>,
- EVEX_4V, EVEX_K, EVEX_B,
+ []>, EVEX_4V, EVEX_K, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
+ }
}
-multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
- PatFrag OpNode_su, X86SchedWriteWidths sched,
+multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
+ X86SchedWriteWidths sched,
AVX512VLVectorVTInfo VTInfo, Predicate prd,
bit IsCommutable = 0> {
let Predicates = [prd] in
- defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.ZMM,
+ defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
VTInfo.info512, IsCommutable>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.YMM,
+ defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
VTInfo.info256, IsCommutable>, EVEX_V256;
- defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.XMM,
+ defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
VTInfo.info128, IsCommutable>, EVEX_V128;
}
}
multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
- PatFrag OpNode, PatFrag OpNode_su,
X86SchedWriteWidths sched,
AVX512VLVectorVTInfo VTInfo,
Predicate prd, bit IsCommutable = 0> {
let Predicates = [prd] in
- defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.ZMM,
+ defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
VTInfo.info512, IsCommutable>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.YMM,
+ defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
VTInfo.info256, IsCommutable>, EVEX_V256;
- defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.XMM,
+ defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
VTInfo.info128, IsCommutable>, EVEX_V128;
}
}
// This fragment treats X86cmpm as commutable to help match loads in both
// operands for PCMPEQ.
def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
-def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2),
- (X86setcc_commute node:$src1, node:$src2, SETEQ)>;
def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
(setcc node:$src1, node:$src2, SETGT)>;
-def X86pcmpeqm_c_su : PatFrag<(ops node:$src1, node:$src2),
- (X86pcmpeqm_c node:$src1, node:$src2), [{
- return N->hasOneUse();
-}]>;
-def X86pcmpgtm_su : PatFrag<(ops node:$src1, node:$src2),
- (X86pcmpgtm node:$src1, node:$src2), [{
- return N->hasOneUse();
-}]>;
-
// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
// increase the pattern complexity the way an immediate would.
let AddedComplexity = 2 in {
// FIXME: Is there a better scheduler class for VPCMP?
-defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c, X86pcmpeqm_c_su,
+defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
EVEX_CD8<8, CD8VF>, VEX_WIG;
-defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c, X86pcmpeqm_c_su,
+defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
EVEX_CD8<16, CD8VF>, VEX_WIG;
-defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c, X86pcmpeqm_c_su,
+defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
EVEX_CD8<32, CD8VF>;
-defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c, X86pcmpeqm_c_su,
+defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm, X86pcmpgtm_su,
+defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
EVEX_CD8<8, CD8VF>, VEX_WIG;
-defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm, X86pcmpgtm_su,
+defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
EVEX_CD8<16, CD8VF>, VEX_WIG;
-defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm, X86pcmpgtm_su,
+defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
EVEX_CD8<32, CD8VF>;
-defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm, X86pcmpgtm_su,
+defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
}
defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
-// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
-multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
- string InstStr,
- X86VectorVTInfo Narrow,
- X86VectorVTInfo Wide> {
- def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1),
- (Narrow.VT Narrow.RC:$src2))),
- (COPY_TO_REGCLASS
- (!cast<Instruction>(InstStr#"Zrr")
- (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
- (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
- Narrow.KRC)>;
-
- def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
- (Frag_su (Narrow.VT Narrow.RC:$src1),
- (Narrow.VT Narrow.RC:$src2)))),
- (COPY_TO_REGCLASS
- (!cast<Instruction>(InstStr#"Zrrk")
- (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
- (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
- (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
- Narrow.KRC)>;
-}
-
-multiclass axv512_icmp_packed_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
- string InstStr,
- X86VectorVTInfo Narrow,
- X86VectorVTInfo Wide> {
- // Broadcast load.
- def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1),
- (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)))),
- (COPY_TO_REGCLASS
- (!cast<Instruction>(InstStr#"Zrmb")
- (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
- addr:$src2),
- Narrow.KRC)>;
-
- def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
- (Frag_su (Narrow.VT Narrow.RC:$src1),
- (Narrow.BroadcastLdFrag addr:$src2)))),
- (COPY_TO_REGCLASS
- (!cast<Instruction>(InstStr#"Zrmbk")
- (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
- (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
- addr:$src2),
- Narrow.KRC)>;
-}
-
// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
string InstStr,
}
let Predicates = [HasAVX512, NoVLX] in {
- // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
- // increase the pattern complexity the way an immediate would.
- let AddedComplexity = 2 in {
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v8i32x_info, v16i32_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v8i32x_info, v16i32_info>;
-
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v4i32x_info, v16i32_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v4i32x_info, v16i32_info>;
-
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v4i64x_info, v8i64_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v4i64x_info, v8i64_info>;
-
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v2i64x_info, v8i64_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v2i64x_info, v8i64_info>;
-
- defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v8i32x_info, v16i32_info>;
- defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v8i32x_info, v16i32_info>;
-
- defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v4i32x_info, v16i32_info>;
- defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v4i32x_info, v16i32_info>;
-
- defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v4i64x_info, v8i64_info>;
- defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v4i64x_info, v8i64_info>;
-
- defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v2i64x_info, v8i64_info>;
- defm : axv512_icmp_packed_rmb_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v2i64x_info, v8i64_info>;
- }
-
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
}
let Predicates = [HasBWI, NoVLX] in {
- // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
- // increase the pattern complexity the way an immediate would.
- let AddedComplexity = 2 in {
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTB", v32i8x_info, v64i8_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQB", v32i8x_info, v64i8_info>;
-
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTB", v16i8x_info, v64i8_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQB", v16i8x_info, v64i8_info>;
-
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTW", v16i16x_info, v32i16_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQW", v16i16x_info, v32i16_info>;
-
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTW", v8i16x_info, v32i16_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQW", v8i16x_info, v32i16_info>;
- }
-
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
break;
}
+ case X86::VPCMPBZ128rmi: case X86::VPCMPBZ128rmik:
+ case X86::VPCMPBZ128rri: case X86::VPCMPBZ128rrik:
+ case X86::VPCMPBZ256rmi: case X86::VPCMPBZ256rmik:
+ case X86::VPCMPBZ256rri: case X86::VPCMPBZ256rrik:
+ case X86::VPCMPBZrmi: case X86::VPCMPBZrmik:
+ case X86::VPCMPBZrri: case X86::VPCMPBZrrik:
+ case X86::VPCMPDZ128rmi: case X86::VPCMPDZ128rmik:
+ case X86::VPCMPDZ128rmib: case X86::VPCMPDZ128rmibk:
+ case X86::VPCMPDZ128rri: case X86::VPCMPDZ128rrik:
+ case X86::VPCMPDZ256rmi: case X86::VPCMPDZ256rmik:
+ case X86::VPCMPDZ256rmib: case X86::VPCMPDZ256rmibk:
+ case X86::VPCMPDZ256rri: case X86::VPCMPDZ256rrik:
+ case X86::VPCMPDZrmi: case X86::VPCMPDZrmik:
+ case X86::VPCMPDZrmib: case X86::VPCMPDZrmibk:
+ case X86::VPCMPDZrri: case X86::VPCMPDZrrik:
+ case X86::VPCMPQZ128rmi: case X86::VPCMPQZ128rmik:
+ case X86::VPCMPQZ128rmib: case X86::VPCMPQZ128rmibk:
+ case X86::VPCMPQZ128rri: case X86::VPCMPQZ128rrik:
+ case X86::VPCMPQZ256rmi: case X86::VPCMPQZ256rmik:
+ case X86::VPCMPQZ256rmib: case X86::VPCMPQZ256rmibk:
+ case X86::VPCMPQZ256rri: case X86::VPCMPQZ256rrik:
+ case X86::VPCMPQZrmi: case X86::VPCMPQZrmik:
+ case X86::VPCMPQZrmib: case X86::VPCMPQZrmibk:
+ case X86::VPCMPQZrri: case X86::VPCMPQZrrik:
+ case X86::VPCMPWZ128rmi: case X86::VPCMPWZ128rmik:
+ case X86::VPCMPWZ128rri: case X86::VPCMPWZ128rrik:
+ case X86::VPCMPWZ256rmi: case X86::VPCMPWZ256rmik:
+ case X86::VPCMPWZ256rri: case X86::VPCMPWZ256rrik:
+ case X86::VPCMPWZrmi: case X86::VPCMPWZrmik:
+ case X86::VPCMPWZrri: case X86::VPCMPWZrrik: {
+ // Turn immediate 0 into the VPCMPEQ instruction.
+ if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 0) {
+ unsigned NewOpc;
+ switch (OutMI.getOpcode()) {
+ case X86::VPCMPBZ128rmi: NewOpc = X86::VPCMPEQBZ128rm; break;
+ case X86::VPCMPBZ128rmik: NewOpc = X86::VPCMPEQBZ128rmk; break;
+ case X86::VPCMPBZ128rri: NewOpc = X86::VPCMPEQBZ128rr; break;
+ case X86::VPCMPBZ128rrik: NewOpc = X86::VPCMPEQBZ128rrk; break;
+ case X86::VPCMPBZ256rmi: NewOpc = X86::VPCMPEQBZ256rm; break;
+ case X86::VPCMPBZ256rmik: NewOpc = X86::VPCMPEQBZ256rmk; break;
+ case X86::VPCMPBZ256rri: NewOpc = X86::VPCMPEQBZ256rr; break;
+ case X86::VPCMPBZ256rrik: NewOpc = X86::VPCMPEQBZ256rrk; break;
+ case X86::VPCMPBZrmi: NewOpc = X86::VPCMPEQBZrm; break;
+ case X86::VPCMPBZrmik: NewOpc = X86::VPCMPEQBZrmk; break;
+ case X86::VPCMPBZrri: NewOpc = X86::VPCMPEQBZrr; break;
+ case X86::VPCMPBZrrik: NewOpc = X86::VPCMPEQBZrrk; break;
+ case X86::VPCMPDZ128rmi: NewOpc = X86::VPCMPEQDZ128rm; break;
+ case X86::VPCMPDZ128rmib: NewOpc = X86::VPCMPEQDZ128rmb; break;
+ case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPEQDZ128rmbk; break;
+ case X86::VPCMPDZ128rmik: NewOpc = X86::VPCMPEQDZ128rmk; break;
+ case X86::VPCMPDZ128rri: NewOpc = X86::VPCMPEQDZ128rr; break;
+ case X86::VPCMPDZ128rrik: NewOpc = X86::VPCMPEQDZ128rrk; break;
+ case X86::VPCMPDZ256rmi: NewOpc = X86::VPCMPEQDZ256rm; break;
+ case X86::VPCMPDZ256rmib: NewOpc = X86::VPCMPEQDZ256rmb; break;
+ case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPEQDZ256rmbk; break;
+ case X86::VPCMPDZ256rmik: NewOpc = X86::VPCMPEQDZ256rmk; break;
+ case X86::VPCMPDZ256rri: NewOpc = X86::VPCMPEQDZ256rr; break;
+ case X86::VPCMPDZ256rrik: NewOpc = X86::VPCMPEQDZ256rrk; break;
+ case X86::VPCMPDZrmi: NewOpc = X86::VPCMPEQDZrm; break;
+ case X86::VPCMPDZrmib: NewOpc = X86::VPCMPEQDZrmb; break;
+ case X86::VPCMPDZrmibk: NewOpc = X86::VPCMPEQDZrmbk; break;
+ case X86::VPCMPDZrmik: NewOpc = X86::VPCMPEQDZrmk; break;
+ case X86::VPCMPDZrri: NewOpc = X86::VPCMPEQDZrr; break;
+ case X86::VPCMPDZrrik: NewOpc = X86::VPCMPEQDZrrk; break;
+ case X86::VPCMPQZ128rmi: NewOpc = X86::VPCMPEQQZ128rm; break;
+ case X86::VPCMPQZ128rmib: NewOpc = X86::VPCMPEQQZ128rmb; break;
+ case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPEQQZ128rmbk; break;
+ case X86::VPCMPQZ128rmik: NewOpc = X86::VPCMPEQQZ128rmk; break;
+ case X86::VPCMPQZ128rri: NewOpc = X86::VPCMPEQQZ128rr; break;
+ case X86::VPCMPQZ128rrik: NewOpc = X86::VPCMPEQQZ128rrk; break;
+ case X86::VPCMPQZ256rmi: NewOpc = X86::VPCMPEQQZ256rm; break;
+ case X86::VPCMPQZ256rmib: NewOpc = X86::VPCMPEQQZ256rmb; break;
+ case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPEQQZ256rmbk; break;
+ case X86::VPCMPQZ256rmik: NewOpc = X86::VPCMPEQQZ256rmk; break;
+ case X86::VPCMPQZ256rri: NewOpc = X86::VPCMPEQQZ256rr; break;
+ case X86::VPCMPQZ256rrik: NewOpc = X86::VPCMPEQQZ256rrk; break;
+ case X86::VPCMPQZrmi: NewOpc = X86::VPCMPEQQZrm; break;
+ case X86::VPCMPQZrmib: NewOpc = X86::VPCMPEQQZrmb; break;
+ case X86::VPCMPQZrmibk: NewOpc = X86::VPCMPEQQZrmbk; break;
+ case X86::VPCMPQZrmik: NewOpc = X86::VPCMPEQQZrmk; break;
+ case X86::VPCMPQZrri: NewOpc = X86::VPCMPEQQZrr; break;
+ case X86::VPCMPQZrrik: NewOpc = X86::VPCMPEQQZrrk; break;
+ case X86::VPCMPWZ128rmi: NewOpc = X86::VPCMPEQWZ128rm; break;
+ case X86::VPCMPWZ128rmik: NewOpc = X86::VPCMPEQWZ128rmk; break;
+ case X86::VPCMPWZ128rri: NewOpc = X86::VPCMPEQWZ128rr; break;
+ case X86::VPCMPWZ128rrik: NewOpc = X86::VPCMPEQWZ128rrk; break;
+ case X86::VPCMPWZ256rmi: NewOpc = X86::VPCMPEQWZ256rm; break;
+ case X86::VPCMPWZ256rmik: NewOpc = X86::VPCMPEQWZ256rmk; break;
+ case X86::VPCMPWZ256rri: NewOpc = X86::VPCMPEQWZ256rr; break;
+ case X86::VPCMPWZ256rrik: NewOpc = X86::VPCMPEQWZ256rrk; break;
+ case X86::VPCMPWZrmi: NewOpc = X86::VPCMPEQWZrm; break;
+ case X86::VPCMPWZrmik: NewOpc = X86::VPCMPEQWZrmk; break;
+ case X86::VPCMPWZrri: NewOpc = X86::VPCMPEQWZrr; break;
+ case X86::VPCMPWZrrik: NewOpc = X86::VPCMPEQWZrrk; break;
+ }
+
+ OutMI.setOpcode(NewOpc);
+ OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
+ break;
+ }
+
+ // Turn immediate 6 into the VPCMPGT instruction.
+ if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 6) {
+ unsigned NewOpc;
+ switch (OutMI.getOpcode()) {
+ case X86::VPCMPBZ128rmi: NewOpc = X86::VPCMPGTBZ128rm; break;
+ case X86::VPCMPBZ128rmik: NewOpc = X86::VPCMPGTBZ128rmk; break;
+ case X86::VPCMPBZ128rri: NewOpc = X86::VPCMPGTBZ128rr; break;
+ case X86::VPCMPBZ128rrik: NewOpc = X86::VPCMPGTBZ128rrk; break;
+ case X86::VPCMPBZ256rmi: NewOpc = X86::VPCMPGTBZ256rm; break;
+ case X86::VPCMPBZ256rmik: NewOpc = X86::VPCMPGTBZ256rmk; break;
+ case X86::VPCMPBZ256rri: NewOpc = X86::VPCMPGTBZ256rr; break;
+ case X86::VPCMPBZ256rrik: NewOpc = X86::VPCMPGTBZ256rrk; break;
+ case X86::VPCMPBZrmi: NewOpc = X86::VPCMPGTBZrm; break;
+ case X86::VPCMPBZrmik: NewOpc = X86::VPCMPGTBZrmk; break;
+ case X86::VPCMPBZrri: NewOpc = X86::VPCMPGTBZrr; break;
+ case X86::VPCMPBZrrik: NewOpc = X86::VPCMPGTBZrrk; break;
+ case X86::VPCMPDZ128rmi: NewOpc = X86::VPCMPGTDZ128rm; break;
+ case X86::VPCMPDZ128rmib: NewOpc = X86::VPCMPGTDZ128rmb; break;
+ case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPGTDZ128rmbk; break;
+ case X86::VPCMPDZ128rmik: NewOpc = X86::VPCMPGTDZ128rmk; break;
+ case X86::VPCMPDZ128rri: NewOpc = X86::VPCMPGTDZ128rr; break;
+ case X86::VPCMPDZ128rrik: NewOpc = X86::VPCMPGTDZ128rrk; break;
+ case X86::VPCMPDZ256rmi: NewOpc = X86::VPCMPGTDZ256rm; break;
+ case X86::VPCMPDZ256rmib: NewOpc = X86::VPCMPGTDZ256rmb; break;
+ case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPGTDZ256rmbk; break;
+ case X86::VPCMPDZ256rmik: NewOpc = X86::VPCMPGTDZ256rmk; break;
+ case X86::VPCMPDZ256rri: NewOpc = X86::VPCMPGTDZ256rr; break;
+ case X86::VPCMPDZ256rrik: NewOpc = X86::VPCMPGTDZ256rrk; break;
+ case X86::VPCMPDZrmi: NewOpc = X86::VPCMPGTDZrm; break;
+ case X86::VPCMPDZrmib: NewOpc = X86::VPCMPGTDZrmb; break;
+ case X86::VPCMPDZrmibk: NewOpc = X86::VPCMPGTDZrmbk; break;
+ case X86::VPCMPDZrmik: NewOpc = X86::VPCMPGTDZrmk; break;
+ case X86::VPCMPDZrri: NewOpc = X86::VPCMPGTDZrr; break;
+ case X86::VPCMPDZrrik: NewOpc = X86::VPCMPGTDZrrk; break;
+ case X86::VPCMPQZ128rmi: NewOpc = X86::VPCMPGTQZ128rm; break;
+ case X86::VPCMPQZ128rmib: NewOpc = X86::VPCMPGTQZ128rmb; break;
+ case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPGTQZ128rmbk; break;
+ case X86::VPCMPQZ128rmik: NewOpc = X86::VPCMPGTQZ128rmk; break;
+ case X86::VPCMPQZ128rri: NewOpc = X86::VPCMPGTQZ128rr; break;
+ case X86::VPCMPQZ128rrik: NewOpc = X86::VPCMPGTQZ128rrk; break;
+ case X86::VPCMPQZ256rmi: NewOpc = X86::VPCMPGTQZ256rm; break;
+ case X86::VPCMPQZ256rmib: NewOpc = X86::VPCMPGTQZ256rmb; break;
+ case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPGTQZ256rmbk; break;
+ case X86::VPCMPQZ256rmik: NewOpc = X86::VPCMPGTQZ256rmk; break;
+ case X86::VPCMPQZ256rri: NewOpc = X86::VPCMPGTQZ256rr; break;
+ case X86::VPCMPQZ256rrik: NewOpc = X86::VPCMPGTQZ256rrk; break;
+ case X86::VPCMPQZrmi: NewOpc = X86::VPCMPGTQZrm; break;
+ case X86::VPCMPQZrmib: NewOpc = X86::VPCMPGTQZrmb; break;
+ case X86::VPCMPQZrmibk: NewOpc = X86::VPCMPGTQZrmbk; break;
+ case X86::VPCMPQZrmik: NewOpc = X86::VPCMPGTQZrmk; break;
+ case X86::VPCMPQZrri: NewOpc = X86::VPCMPGTQZrr; break;
+ case X86::VPCMPQZrrik: NewOpc = X86::VPCMPGTQZrrk; break;
+ case X86::VPCMPWZ128rmi: NewOpc = X86::VPCMPGTWZ128rm; break;
+ case X86::VPCMPWZ128rmik: NewOpc = X86::VPCMPGTWZ128rmk; break;
+ case X86::VPCMPWZ128rri: NewOpc = X86::VPCMPGTWZ128rr; break;
+ case X86::VPCMPWZ128rrik: NewOpc = X86::VPCMPGTWZ128rrk; break;
+ case X86::VPCMPWZ256rmi: NewOpc = X86::VPCMPGTWZ256rm; break;
+ case X86::VPCMPWZ256rmik: NewOpc = X86::VPCMPGTWZ256rmk; break;
+ case X86::VPCMPWZ256rri: NewOpc = X86::VPCMPGTWZ256rr; break;
+ case X86::VPCMPWZ256rrik: NewOpc = X86::VPCMPGTWZ256rrk; break;
+ case X86::VPCMPWZrmi: NewOpc = X86::VPCMPGTWZrm; break;
+ case X86::VPCMPWZrmik: NewOpc = X86::VPCMPGTWZrmk; break;
+ case X86::VPCMPWZrri: NewOpc = X86::VPCMPGTWZrr; break;
+ case X86::VPCMPWZrrik: NewOpc = X86::VPCMPGTWZrrk; break;
+ }
+
+ OutMI.setOpcode(NewOpc);
+ OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
+ break;
+ }
+
+ break;
+ }
+
// CALL64r, CALL64pcrel32 - These instructions used to have
// register inputs modeled as normal uses instead of implicit uses. As such,
// they we used to truncate off all but the first operand (the callee). This
; KNL-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
; KNL-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
-; KNL-NEXT: vpcmpleq %zmm1, %zmm0, %k1
-; KNL-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 {%k1}
+; KNL-NEXT: vpcmpgtq %zmm3, %zmm2, %k1
+; KNL-NEXT: vpcmpleq %zmm1, %zmm0, %k1 {%k1}
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; KNL-NEXT: vzeroupper
;
; SKX-LABEL: test4:
; SKX: ## %bb.0:
-; SKX-NEXT: vpcmpleq %ymm1, %ymm0, %k1
-; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
+; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k1
+; SKX-NEXT: vpcmpleq %ymm1, %ymm0, %k0 {%k1}
; SKX-NEXT: vpmovm2d %k0, %xmm0
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; AVX512BW-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
; AVX512BW-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512BW-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpcmpleq %zmm1, %zmm0, %k1
-; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 {%k1}
+; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k1
+; AVX512BW-NEXT: vpcmpleq %zmm1, %zmm0, %k1 {%k1}
; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BW-NEXT: vzeroupper
; AVX512DQ-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
; AVX512DQ-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512DQ-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512DQ-NEXT: vpcmpleq %zmm1, %zmm0, %k1
-; AVX512DQ-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
+; AVX512DQ-NEXT: vpcmpgtq %zmm3, %zmm2, %k1
+; AVX512DQ-NEXT: vpcmpleq %zmm1, %zmm0, %k0 {%k1}
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
;
; X86-LABEL: test4:
; X86: ## %bb.0:
-; X86-NEXT: vpcmpleq %ymm1, %ymm0, %k1
-; X86-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
+; X86-NEXT: vpcmpgtq %ymm3, %ymm2, %k1
+; X86-NEXT: vpcmpleq %ymm1, %ymm0, %k0 {%k1}
; X86-NEXT: vpmovm2d %k0, %xmm0
; X86-NEXT: vzeroupper
; X86-NEXT: retl
; KNL-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2
; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
-; KNL-NEXT: vpcmpleq %zmm3, %zmm2, %k1
-; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 {%k1}
+; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k1
+; KNL-NEXT: vpcmpleq %zmm3, %zmm2, %k1 {%k1}
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; KNL-NEXT: vzeroupper
;
; SKX-LABEL: test5:
; SKX: ## %bb.0:
-; SKX-NEXT: vpcmpleq %xmm3, %xmm2, %k1
-; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1}
+; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k1
+; SKX-NEXT: vpcmpleq %xmm3, %xmm2, %k0 {%k1}
; SKX-NEXT: vpmovm2q %k0, %xmm0
; SKX-NEXT: retq
;
; AVX512BW-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2
; AVX512BW-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vpcmpleq %zmm3, %zmm2, %k1
-; AVX512BW-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 {%k1}
+; AVX512BW-NEXT: vpcmpgtq %zmm0, %zmm1, %k1
+; AVX512BW-NEXT: vpcmpleq %zmm3, %zmm2, %k1 {%k1}
; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BW-NEXT: vzeroupper
; AVX512DQ-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2
; AVX512DQ-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512DQ-NEXT: vpcmpleq %zmm3, %zmm2, %k1
-; AVX512DQ-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 {%k1}
+; AVX512DQ-NEXT: vpcmpgtq %zmm0, %zmm1, %k1
+; AVX512DQ-NEXT: vpcmpleq %zmm3, %zmm2, %k0 {%k1}
; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
;
; X86-LABEL: test5:
; X86: ## %bb.0:
-; X86-NEXT: vpcmpleq %xmm3, %xmm2, %k1
-; X86-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1}
+; X86-NEXT: vpcmpgtq %xmm0, %xmm1, %k1
+; X86-NEXT: vpcmpleq %xmm3, %xmm2, %k0 {%k1}
; X86-NEXT: vpmovm2q %k0, %xmm0
; X86-NEXT: retl
%x_gt_y = icmp slt <2 x i64> %x, %y