def SDTFPTernaryOp : SDTypeProfile<1, 3, [ // fmadd, fnmsub, etc.
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisFP<0>
]>;
-def SDTIntUnaryOp : SDTypeProfile<1, 1, [ // ctlz, cttz
+def SDTIntUnaryOp : SDTypeProfile<1, 1, [ // bitreverse
SDTCisSameAs<0, 1>, SDTCisInt<0>
]>;
+def SDTIntBitCountUnaryOp : SDTypeProfile<1, 1, [ // ctlz, cttz
+ SDTCisInt<0>, SDTCisInt<1>
+]>;
def SDTIntExtendOp : SDTypeProfile<1, 1, [ // sext, zext, anyext
SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisSameNumEltsAs<0, 1>
]>;
def abs : SDNode<"ISD::ABS" , SDTIntUnaryOp>;
def bitreverse : SDNode<"ISD::BITREVERSE" , SDTIntUnaryOp>;
def bswap : SDNode<"ISD::BSWAP" , SDTIntUnaryOp>;
-def ctlz : SDNode<"ISD::CTLZ" , SDTIntUnaryOp>;
-def cttz : SDNode<"ISD::CTTZ" , SDTIntUnaryOp>;
-def ctpop : SDNode<"ISD::CTPOP" , SDTIntUnaryOp>;
-def ctlz_zero_undef : SDNode<"ISD::CTLZ_ZERO_UNDEF", SDTIntUnaryOp>;
-def cttz_zero_undef : SDNode<"ISD::CTTZ_ZERO_UNDEF", SDTIntUnaryOp>;
+def ctlz : SDNode<"ISD::CTLZ" , SDTIntBitCountUnaryOp>;
+def cttz : SDNode<"ISD::CTTZ" , SDTIntBitCountUnaryOp>;
+def ctpop : SDNode<"ISD::CTPOP" , SDTIntBitCountUnaryOp>;
+def ctlz_zero_undef : SDNode<"ISD::CTLZ_ZERO_UNDEF", SDTIntBitCountUnaryOp>;
+def cttz_zero_undef : SDNode<"ISD::CTTZ_ZERO_UNDEF", SDTIntBitCountUnaryOp>;
def sext : SDNode<"ISD::SIGN_EXTEND", SDTIntExtendOp>;
def zext : SDNode<"ISD::ZERO_EXTEND", SDTIntExtendOp>;
def anyext : SDNode<"ISD::ANY_EXTEND" , SDTIntExtendOp>;
>;
}
def : GCNPat <
- (i16 (add (i16 (trunc (getDivergentFrag<ctpop>.ret i32:$popcnt))), i16:$val)),
+ (i16 (add (i16 (trunc (i32 (getDivergentFrag<ctpop>.ret i32:$popcnt)))), i16:$val)),
(V_BCNT_U32_B32_e64 $popcnt, $val)
>;
//
// Count leading zeros.
-def: Pat<(ctlz I32:$Rs), (S2_cl0 I32:$Rs)>;
+def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>;
def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>;
// Count trailing zeros.
-def: Pat<(cttz I32:$Rs), (S2_ct0 I32:$Rs)>;
+def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>;
def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>;
// Count leading ones.
-def: Pat<(ctlz (not I32:$Rs)), (S2_cl1 I32:$Rs)>;
+def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>;
def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>;
// Count trailing ones.
-def: Pat<(cttz (not I32:$Rs)), (S2_ct1 I32:$Rs)>;
+def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>;
def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>;
// Define leading/trailing patterns that require zero-extensions to 64 bits.
// ctz instruction always returns a 32-bit value. For ctlz.i64, convert the
// ptx value to 64 bits to match the ISD node's semantics, unless we know we're
// truncating back down to 32 bits.
-def : Pat<(ctlz Int64Regs:$a), (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
+def : Pat<(i64 (ctlz Int64Regs:$a)), (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
def : Pat<(i32 (trunc (ctlz Int64Regs:$a))), (CLZr64 Int64Regs:$a)>;
// For 16-bit ctlz, we zero-extend to 32-bit, perform the count, then trunc the
// and then ctlz that value. This way we don't have to subtract 16 from the
// result. Unfortunately today we don't have a way to generate
// "mov b32reg, {b16imm, b16reg}", so we don't do this optimization.
-def : Pat<(ctlz Int16Regs:$a),
+def : Pat<(i16 (ctlz Int16Regs:$a)),
(SUBi16ri (CVT_u16_u32
(CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE), 16)>;
-def : Pat<(i32 (zext (ctlz Int16Regs:$a))),
+def : Pat<(i32 (zext (i16 (ctlz Int16Regs:$a)))),
(SUBi32ri (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), 16)>;
// Population count
// If we know that we're storing into an i32, we can avoid the final trunc.
def : Pat<(ctpop Int16Regs:$a),
(CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE)>;
-def : Pat<(i32 (zext (ctpop Int16Regs:$a))),
+def : Pat<(i32 (zext (i16 (ctpop Int16Regs:$a)))),
(POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE))>;
// fpround f32 -> f16
def : Pat<(SPcmpicc i64:$a, i64:$b), (CMPrr $a, $b)>;
def : Pat<(SPcmpicc i64:$a, (i64 simm13:$b)), (CMPri $a, (as_i32imm $b))>;
-def : Pat<(ctpop i64:$src), (POPCrr $src)>;
+def : Pat<(i64 (ctpop i64:$src)), (POPCrr $src)>;
} // Predicates = [Is64Bit]
defm LDQF : LoadA<"ldq", 0b100010, 0b110010, load, QFPRegs, f128>,
Requires<[HasV9, HasHardQuad]>;
-let DecoderMethod = "DecodeLoadCP" in
- defm LDC : Load<"ld", 0b110000, load, CoprocRegs, i32>;
-let DecoderMethod = "DecodeLoadCPPair" in
+let DecoderMethod = "DecodeLoadCP" in
+ defm LDC : Load<"ld", 0b110000, load, CoprocRegs, i32>;
+let DecoderMethod = "DecodeLoadCPPair" in
defm LDDC : Load<"ldd", 0b110011, load, CoprocPair, v2i32, IIC_ldd>;
let DecoderMethod = "DecodeLoadCP", Defs = [CPSR] in {
def POPCrr : F3_1<2, 0b101110,
(outs IntRegs:$rd), (ins IntRegs:$rs2),
"popc $rs2, $rd", []>, Requires<[HasV9]>;
-def : Pat<(ctpop i32:$src),
+def : Pat<(i32 (ctpop i32:$src)),
(POPCrr (SRLri $src, 0))>;
let Predicates = [HasV9], hasSideEffects = 1, rd = 0, rs1 = 0b01111 in
// cleared. We only use the first result here.
let Defs = [CC] in
def FLOGR : UnaryRRE<"flogr", 0xB983, null_frag, GR128, GR64>;
-def : Pat<(ctlz GR64:$src),
+def : Pat<(i64 (ctlz GR64:$src)),
(EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>;
// Population count. Counts bits set per byte or doubleword.
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1), OpcodeStr,
"$src1", "$src1",
- (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase,
+ (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
Sched<[sched]>;
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src1), OpcodeStr,
"$src1", "$src1",
- (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
+ (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded]>;
}
(ins _.ScalarMemOp:$src1), OpcodeStr,
"${src1}"##_.BroadcastStr,
"${src1}"##_.BroadcastStr,
- (_.VT (OpNode (X86VBroadcast
- (_.ScalarLdFrag addr:$src1))))>,
+ (_.VT (OpNode (_.VT (X86VBroadcast
+ (_.ScalarLdFrag addr:$src1)))))>,
EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded]>;
}
multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
AVX512VLVectorVTInfo _, Predicate prd> {
let Predicates = [prd, NoVLX] in {
- def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
+ def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
(EXTRACT_SUBREG
(!cast<Instruction>(InstrStr # "Zrr")
(INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
_.info256.SubRegIdx)),
_.info256.SubRegIdx)>;
- def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
+ def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
(EXTRACT_SUBREG
(!cast<Instruction>(InstrStr # "Zrr")
(INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),