setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SMIN, VT, Legal);
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMIN, VT, Legal);
return SDValue();
// We essentially have two forms here. Shift by an immediate and shift by a
- // vector register. We cannot easily match shift by an immediate in tablegen
- // so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM. For shifting
- // by a vector, we don't have VSHR, only VSHL (which can be signed or
- // unsigned, and a negative shift indicates a shift right).
+ // vector register (there are also shift by a gpr, but that is just handled
+ // with a tablegen pattern). We cannot easily match shift by an immediate in
+ // tablegen so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM.
+ // For shifting by a vector, we don't have VSHR, only VSHL (which can be
+ // signed or unsigned, and a negative shift indicates a shift right).
if (N->getOpcode() == ISD::SHL) {
if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
if (!VT.isVector() || !TLI.isTypeLegal(VT))
return SDValue();
- assert(ST->hasNEON() && "unexpected vector shift");
int64_t Cnt;
switch (N->getOpcode()) {
def ARMvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
def ARMvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>;
+
+def SDTARMVSHIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
+def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,]>;
+def ARMvshlImm : SDNode<"ARMISD::VSHLIMM", SDTARMVSHIMM>;
+def ARMvshrsImm : SDNode<"ARMISD::VSHRsIMM", SDTARMVSHIMM>;
+def ARMvshruImm : SDNode<"ARMISD::VSHRuIMM", SDTARMVSHIMM>;
+def ARMvshls : SDNode<"ARMISD::VSHLs", SDTARMVSH>;
+def ARMvshlu : SDNode<"ARMISD::VSHLu", SDTARMVSH>;
+
def ARMWLS : SDNode<"ARMISD::WLS", SDT_ARMWhileLoop,
[SDNPHasChain]>;
defm MVE_VQRSHL_by_vec : mve_shift_by_vec_multi<"vqrshl", 0b1, 0b1>;
defm MVE_VRSHL_by_vec : mve_shift_by_vec_multi<"vrshl", 0b0, 0b1>;
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v4i32 (ARMvshlu (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn))),
+ (v4i32 (MVE_VSHL_by_vecu32 (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)))>;
+ def : Pat<(v8i16 (ARMvshlu (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn))),
+ (v8i16 (MVE_VSHL_by_vecu16 (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)))>;
+ def : Pat<(v16i8 (ARMvshlu (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn))),
+ (v16i8 (MVE_VSHL_by_vecu8 (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)))>;
+
+ def : Pat<(v4i32 (ARMvshls (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn))),
+ (v4i32 (MVE_VSHL_by_vecs32 (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)))>;
+ def : Pat<(v8i16 (ARMvshls (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn))),
+ (v8i16 (MVE_VSHL_by_vecs16 (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)))>;
+ def : Pat<(v16i8 (ARMvshls (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn))),
+ (v16i8 (MVE_VSHL_by_vecs8 (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)))>;
+}
+
class MVE_shift_with_imm<string iname, string suffix, dag oops, dag iops,
string ops, vpred_ops vpred, string cstr,
list<dag> pattern=[]>
let Inst{21} = 0b1;
}
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v4i32 (ARMvshlImm (v4i32 MQPR:$src), imm0_31:$imm)),
+ (v4i32 (MVE_VSHL_immi32 (v4i32 MQPR:$src), imm0_31:$imm))>;
+ def : Pat<(v8i16 (ARMvshlImm (v8i16 MQPR:$src), imm0_15:$imm)),
+ (v8i16 (MVE_VSHL_immi16 (v8i16 MQPR:$src), imm0_15:$imm))>;
+ def : Pat<(v16i8 (ARMvshlImm (v16i8 MQPR:$src), imm0_7:$imm)),
+ (v16i8 (MVE_VSHL_immi8 (v16i8 MQPR:$src), imm0_7:$imm))>;
+
+ def : Pat<(v4i32 (ARMvshruImm (v4i32 MQPR:$src), imm0_31:$imm)),
+ (v4i32 (MVE_VSHR_immu32 (v4i32 MQPR:$src), imm0_31:$imm))>;
+ def : Pat<(v8i16 (ARMvshruImm (v8i16 MQPR:$src), imm0_15:$imm)),
+ (v8i16 (MVE_VSHR_immu16 (v8i16 MQPR:$src), imm0_15:$imm))>;
+ def : Pat<(v16i8 (ARMvshruImm (v16i8 MQPR:$src), imm0_7:$imm)),
+ (v16i8 (MVE_VSHR_immu8 (v16i8 MQPR:$src), imm0_7:$imm))>;
+
+ def : Pat<(v4i32 (ARMvshrsImm (v4i32 MQPR:$src), imm0_31:$imm)),
+ (v4i32 (MVE_VSHR_imms32 (v4i32 MQPR:$src), imm0_31:$imm))>;
+ def : Pat<(v8i16 (ARMvshrsImm (v8i16 MQPR:$src), imm0_15:$imm)),
+ (v8i16 (MVE_VSHR_imms16 (v8i16 MQPR:$src), imm0_15:$imm))>;
+ def : Pat<(v16i8 (ARMvshrsImm (v16i8 MQPR:$src), imm0_7:$imm)),
+ (v16i8 (MVE_VSHR_imms8 (v16i8 MQPR:$src), imm0_7:$imm))>;
+}
+
// end of mve_shift instructions
// start of MVE Floating Point instructions
defm MVE_VQSHL_qr : MVE_VxSHL_qr_types<"vqshl", 0b1, 0b0>;
defm MVE_VQRSHL_qr : MVE_VxSHL_qr_types<"vqrshl", 0b1, 0b1>;
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v4i32 (ARMvshlu (v4i32 MQPR:$Qm), (v4i32 (ARMvdup GPR:$Rm)))),
+ (v4i32 (MVE_VSHL_qru32 (v4i32 MQPR:$Qm), GPR:$Rm))>;
+ def : Pat<(v8i16 (ARMvshlu (v8i16 MQPR:$Qm), (v8i16 (ARMvdup GPR:$Rm)))),
+ (v8i16 (MVE_VSHL_qru16 (v8i16 MQPR:$Qm), GPR:$Rm))>;
+ def : Pat<(v16i8 (ARMvshlu (v16i8 MQPR:$Qm), (v16i8 (ARMvdup GPR:$Rm)))),
+ (v16i8 (MVE_VSHL_qru8 (v16i8 MQPR:$Qm), GPR:$Rm))>;
+
+ def : Pat<(v4i32 (ARMvshls (v4i32 MQPR:$Qm), (v4i32 (ARMvdup GPR:$Rm)))),
+ (v4i32 (MVE_VSHL_qrs32 (v4i32 MQPR:$Qm), GPR:$Rm))>;
+ def : Pat<(v8i16 (ARMvshls (v8i16 MQPR:$Qm), (v8i16 (ARMvdup GPR:$Rm)))),
+ (v8i16 (MVE_VSHL_qrs16 (v8i16 MQPR:$Qm), GPR:$Rm))>;
+ def : Pat<(v16i8 (ARMvshls (v16i8 MQPR:$Qm), (v16i8 (ARMvdup GPR:$Rm)))),
+ (v16i8 (MVE_VSHL_qrs8 (v16i8 MQPR:$Qm), GPR:$Rm))>;
+}
+
class MVE_VBRSR<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
: MVE_qDest_rSrc<iname, suffix, pattern> {
def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>;
-// Vector Shifts
-def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
- SDTCisSameAs<0, 2>,]>;
-
-def NEONvshls : SDNode<"ARMISD::VSHLs", SDTARMVSH>;
-def NEONvshlu : SDNode<"ARMISD::VSHLu", SDTARMVSH>;
-
// Types for vector shift by immediates. The "SHX" version is for long and
// narrow operations where the source and destination vectors have different
// types. The "SHINS" version is for shift and insert operations.
-def SDTARMVSHIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
- SDTCisVT<2, i32>]>;
def SDTARMVSHXIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
SDTCisVT<2, i32>]>;
def SDTARMVSHINSIMM : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
-def NEONvshlImm : SDNode<"ARMISD::VSHLIMM", SDTARMVSHIMM>;
-def NEONvshrsImm : SDNode<"ARMISD::VSHRsIMM", SDTARMVSHIMM>;
-def NEONvshruImm : SDNode<"ARMISD::VSHRuIMM", SDTARMVSHIMM>;
def NEONvshrnImm : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>;
def NEONvrshrsImm : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>;
int_arm_neon_vraddhn, 1>;
let Predicates = [HasNEON] in {
-def : Pat<(v8i8 (trunc (NEONvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
+def : Pat<(v8i8 (trunc (ARMvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
(VADDHNv8i8 QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v4i16 (trunc (NEONvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
+def : Pat<(v4i16 (trunc (ARMvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
(VADDHNv4i16 QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v2i32 (trunc (NEONvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
+def : Pat<(v2i32 (trunc (ARMvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
(VADDHNv2i32 QPR:$Vn, QPR:$Vm)>;
}
int_arm_neon_vrsubhn, 0>;
let Predicates = [HasNEON] in {
-def : Pat<(v8i8 (trunc (NEONvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
+def : Pat<(v8i8 (trunc (ARMvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
(VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v4i16 (trunc (NEONvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
+def : Pat<(v4i16 (trunc (ARMvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
(VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v2i32 (trunc (NEONvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
+def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
(VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>;
}
def abd_shr :
PatFrag<(ops node:$in1, node:$in2, node:$shift),
- (NEONvshrsImm (sub (zext node:$in1),
+ (ARMvshrsImm (sub (zext node:$in1),
(zext node:$in2)), (i32 $shift))>;
let Predicates = [HasNEON] in {
"vshl", "u", int_arm_neon_vshiftu>;
let Predicates = [HasNEON] in {
-def : Pat<(v8i8 (NEONvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
+def : Pat<(v8i8 (ARMvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
(VSHLsv8i8 DPR:$Dn, DPR:$Dm)>;
-def : Pat<(v4i16 (NEONvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
+def : Pat<(v4i16 (ARMvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
(VSHLsv4i16 DPR:$Dn, DPR:$Dm)>;
-def : Pat<(v2i32 (NEONvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
+def : Pat<(v2i32 (ARMvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
(VSHLsv2i32 DPR:$Dn, DPR:$Dm)>;
-def : Pat<(v1i64 (NEONvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
+def : Pat<(v1i64 (ARMvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
(VSHLsv1i64 DPR:$Dn, DPR:$Dm)>;
-def : Pat<(v16i8 (NEONvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
+def : Pat<(v16i8 (ARMvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
(VSHLsv16i8 QPR:$Dn, QPR:$Dm)>;
-def : Pat<(v8i16 (NEONvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
+def : Pat<(v8i16 (ARMvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
(VSHLsv8i16 QPR:$Dn, QPR:$Dm)>;
-def : Pat<(v4i32 (NEONvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
+def : Pat<(v4i32 (ARMvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
(VSHLsv4i32 QPR:$Dn, QPR:$Dm)>;
-def : Pat<(v2i64 (NEONvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
+def : Pat<(v2i64 (ARMvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
(VSHLsv2i64 QPR:$Dn, QPR:$Dm)>;
-def : Pat<(v8i8 (NEONvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
+def : Pat<(v8i8 (ARMvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
(VSHLuv8i8 DPR:$Dn, DPR:$Dm)>;
-def : Pat<(v4i16 (NEONvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
+def : Pat<(v4i16 (ARMvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
(VSHLuv4i16 DPR:$Dn, DPR:$Dm)>;
-def : Pat<(v2i32 (NEONvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
+def : Pat<(v2i32 (ARMvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
(VSHLuv2i32 DPR:$Dn, DPR:$Dm)>;
-def : Pat<(v1i64 (NEONvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
+def : Pat<(v1i64 (ARMvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
(VSHLuv1i64 DPR:$Dn, DPR:$Dm)>;
-def : Pat<(v16i8 (NEONvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
+def : Pat<(v16i8 (ARMvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
(VSHLuv16i8 QPR:$Dn, QPR:$Dm)>;
-def : Pat<(v8i16 (NEONvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
+def : Pat<(v8i16 (ARMvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
(VSHLuv8i16 QPR:$Dn, QPR:$Dm)>;
-def : Pat<(v4i32 (NEONvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
+def : Pat<(v4i32 (ARMvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
(VSHLuv4i32 QPR:$Dn, QPR:$Dm)>;
-def : Pat<(v2i64 (NEONvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
+def : Pat<(v2i64 (ARMvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
(VSHLuv2i64 QPR:$Dn, QPR:$Dm)>;
}
// VSHL : Vector Shift Left (Immediate)
-defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshlImm>;
+defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", ARMvshlImm>;
// VSHR : Vector Shift Right (Immediate)
defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs",
- NEONvshrsImm>;
+ ARMvshrsImm>;
defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu",
- NEONvshruImm>;
+ ARMvshruImm>;
// VSHLL : Vector Shift Left Long
defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s",
- PatFrag<(ops node:$LHS, node:$RHS), (NEONvshlImm (sext node:$LHS), node:$RHS)>>;
+ PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (sext node:$LHS), node:$RHS)>>;
defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u",
- PatFrag<(ops node:$LHS, node:$RHS), (NEONvshlImm (zext node:$LHS), node:$RHS)>>;
+ PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (zext node:$LHS), node:$RHS)>>;
// VSHLL : Vector Shift Left Long (with maximum shift count)
class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
v2i64, v2i32, imm32>;
let Predicates = [HasNEON] in {
-def : Pat<(v8i16 (NEONvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))),
+def : Pat<(v8i16 (ARMvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))),
(VSHLLi8 DPR:$Rn, 8)>;
-def : Pat<(v4i32 (NEONvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))),
+def : Pat<(v4i32 (ARMvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))),
(VSHLLi16 DPR:$Rn, 16)>;
-def : Pat<(v2i64 (NEONvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))),
+def : Pat<(v2i64 (ARMvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))),
(VSHLLi32 DPR:$Rn, 32)>;
-def : Pat<(v8i16 (NEONvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))),
+def : Pat<(v8i16 (ARMvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))),
(VSHLLi8 DPR:$Rn, 8)>;
-def : Pat<(v4i32 (NEONvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))),
+def : Pat<(v4i32 (ARMvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))),
(VSHLLi16 DPR:$Rn, 16)>;
-def : Pat<(v2i64 (NEONvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))),
+def : Pat<(v2i64 (ARMvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))),
(VSHLLi32 DPR:$Rn, 32)>;
-def : Pat<(v8i16 (NEONvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))),
+def : Pat<(v8i16 (ARMvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))),
(VSHLLi8 DPR:$Rn, 8)>;
-def : Pat<(v4i32 (NEONvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))),
+def : Pat<(v4i32 (ARMvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))),
(VSHLLi16 DPR:$Rn, 16)>;
-def : Pat<(v2i64 (NEONvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))),
+def : Pat<(v2i64 (ARMvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))),
(VSHLLi32 DPR:$Rn, 32)>;
}
// VSHRN : Vector Shift Right and Narrow
defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
PatFrag<(ops node:$Rn, node:$amt),
- (trunc (NEONvshrsImm node:$Rn, node:$amt))>>;
+ (trunc (ARMvshrsImm node:$Rn, node:$amt))>>;
let Predicates = [HasNEON] in {
-def : Pat<(v8i8 (trunc (NEONvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))),
+def : Pat<(v8i8 (trunc (ARMvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))),
(VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>;
-def : Pat<(v4i16 (trunc (NEONvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))),
+def : Pat<(v4i16 (trunc (ARMvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))),
(VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>;
-def : Pat<(v2i32 (trunc (NEONvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))),
+def : Pat<(v2i32 (trunc (ARMvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))),
(VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>;
}
NEONvqrshrnsuImm>;
// VSRA : Vector Shift Right and Accumulate
-defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrsImm>;
-defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshruImm>;
+defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", ARMvshrsImm>;
+defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", ARMvshruImm>;
// VRSRA : Vector Rounding Shift Right and Accumulate
defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrsImm>;
defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshruImm>;
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O3 -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve %s -o - | FileCheck %s
+
+define arm_aapcs_vfpcc <16 x i8> @shl_qq_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
+; CHECK-LABEL: shl_qq_int8_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshl.u8 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = shl <16 x i8> %src1, %src2
+ ret <16 x i8> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @shl_qq_int16_t(<8 x i16> %src1, <8 x i16> %src2) {
+; CHECK-LABEL: shl_qq_int16_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshl.u16 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = shl <8 x i16> %src1, %src2
+ ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @shl_qq_int32_t(<4 x i32> %src1, <4 x i32> %src2) {
+; CHECK-LABEL: shl_qq_int32_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshl.u32 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = shl <4 x i32> %src1, %src2
+ ret <4 x i32> %0
+}
+
+
+define arm_aapcs_vfpcc <16 x i8> @shru_qq_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
+; CHECK-LABEL: shru_qq_int8_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vneg.s8 q1, q1
+; CHECK-NEXT: vshl.u8 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = lshr <16 x i8> %src1, %src2
+ ret <16 x i8> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @shru_qq_int16_t(<8 x i16> %src1, <8 x i16> %src2) {
+; CHECK-LABEL: shru_qq_int16_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vneg.s16 q1, q1
+; CHECK-NEXT: vshl.u16 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = lshr <8 x i16> %src1, %src2
+ ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @shru_qq_int32_t(<4 x i32> %src1, <4 x i32> %src2) {
+; CHECK-LABEL: shru_qq_int32_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vneg.s32 q1, q1
+; CHECK-NEXT: vshl.u32 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = lshr <4 x i32> %src1, %src2
+ ret <4 x i32> %0
+}
+
+
+define arm_aapcs_vfpcc <16 x i8> @shrs_qq_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
+; CHECK-LABEL: shrs_qq_int8_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vneg.s8 q1, q1
+; CHECK-NEXT: vshl.s8 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = ashr <16 x i8> %src1, %src2
+ ret <16 x i8> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @shrs_qq_int16_t(<8 x i16> %src1, <8 x i16> %src2) {
+; CHECK-LABEL: shrs_qq_int16_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vneg.s16 q1, q1
+; CHECK-NEXT: vshl.s16 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = ashr <8 x i16> %src1, %src2
+ ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @shrs_qq_int32_t(<4 x i32> %src1, <4 x i32> %src2) {
+; CHECK-LABEL: shrs_qq_int32_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vneg.s32 q1, q1
+; CHECK-NEXT: vshl.s32 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = ashr <4 x i32> %src1, %src2
+ ret <4 x i32> %0
+}
+
+
+define arm_aapcs_vfpcc <16 x i8> @shl_qi_int8_t(<16 x i8> %src1) {
+; CHECK-LABEL: shl_qi_int8_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshl.i8 q0, q0, #4
+; CHECK-NEXT: bx lr
+entry:
+ %0 = shl <16 x i8> %src1, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
+ ret <16 x i8> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @shl_qi_int16_t(<8 x i16> %src1) {
+; CHECK-LABEL: shl_qi_int16_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshl.i16 q0, q0, #4
+; CHECK-NEXT: bx lr
+entry:
+ %0 = shl <8 x i16> %src1, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
+ ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @shl_qi_int32_t(<4 x i32> %src1) {
+; CHECK-LABEL: shl_qi_int32_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshl.i32 q0, q0, #4
+; CHECK-NEXT: bx lr
+entry:
+ %0 = shl <4 x i32> %src1, <i32 4, i32 4, i32 4, i32 4>
+ ret <4 x i32> %0
+}
+
+
+define arm_aapcs_vfpcc <16 x i8> @shru_qi_int8_t(<16 x i8> %src1) {
+; CHECK-LABEL: shru_qi_int8_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshr.u8 q0, q0, #4
+; CHECK-NEXT: bx lr
+entry:
+ %0 = lshr <16 x i8> %src1, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
+ ret <16 x i8> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @shru_qi_int16_t(<8 x i16> %src1) {
+; CHECK-LABEL: shru_qi_int16_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshr.u16 q0, q0, #4
+; CHECK-NEXT: bx lr
+entry:
+ %0 = lshr <8 x i16> %src1, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
+ ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @shru_qi_int32_t(<4 x i32> %src1) {
+; CHECK-LABEL: shru_qi_int32_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshr.u32 q0, q0, #4
+; CHECK-NEXT: bx lr
+entry:
+ %0 = lshr <4 x i32> %src1, <i32 4, i32 4, i32 4, i32 4>
+ ret <4 x i32> %0
+}
+
+
+define arm_aapcs_vfpcc <16 x i8> @shrs_qi_int8_t(<16 x i8> %src1) {
+; CHECK-LABEL: shrs_qi_int8_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshr.s8 q0, q0, #4
+; CHECK-NEXT: bx lr
+entry:
+ %0 = ashr <16 x i8> %src1, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
+ ret <16 x i8> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @shrs_qi_int16_t(<8 x i16> %src1) {
+; CHECK-LABEL: shrs_qi_int16_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshr.s16 q0, q0, #4
+; CHECK-NEXT: bx lr
+entry:
+ %0 = ashr <8 x i16> %src1, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
+ ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @shrs_qi_int32_t(<4 x i32> %src1) {
+; CHECK-LABEL: shrs_qi_int32_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshr.s32 q0, q0, #4
+; CHECK-NEXT: bx lr
+entry:
+ %0 = ashr <4 x i32> %src1, <i32 4, i32 4, i32 4, i32 4>
+ ret <4 x i32> %0
+}
+
+
+
+define arm_aapcs_vfpcc <16 x i8> @shl_qr_int8_t(<16 x i8> %src1, i8 %src2) {
+; CHECK-LABEL: shl_qr_int8_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshl.u8 q0, r0
+; CHECK-NEXT: bx lr
+entry:
+ %i = insertelement <16 x i8> undef, i8 %src2, i32 0
+ %s = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
+ %0 = shl <16 x i8> %src1, %s
+ ret <16 x i8> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @shl_qr_int16_t(<8 x i16> %src1, i16 %src2) {
+; CHECK-LABEL: shl_qr_int16_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshl.u16 q0, r0
+; CHECK-NEXT: bx lr
+entry:
+ %i = insertelement <8 x i16> undef, i16 %src2, i32 0
+ %s = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
+ %0 = shl <8 x i16> %src1, %s
+ ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @shl_qr_int32_t(<4 x i32> %src1, i32 %src2) {
+; CHECK-LABEL: shl_qr_int32_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshl.u32 q0, r0
+; CHECK-NEXT: bx lr
+entry:
+ %i = insertelement <4 x i32> undef, i32 %src2, i32 0
+ %s = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %0 = shl <4 x i32> %src1, %s
+ ret <4 x i32> %0
+}
+
+
+define arm_aapcs_vfpcc <16 x i8> @shru_qr_int8_t(<16 x i8> %src1, i8 %src2) {
+; CHECK-LABEL: shru_qr_int8_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vdup.8 q1, r0
+; CHECK-NEXT: vneg.s8 q1, q1
+; CHECK-NEXT: vshl.u8 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %i = insertelement <16 x i8> undef, i8 %src2, i32 0
+ %s = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
+ %0 = lshr <16 x i8> %src1, %s
+ ret <16 x i8> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @shru_qr_int16_t(<8 x i16> %src1, i16 %src2) {
+; CHECK-LABEL: shru_qr_int16_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vdup.16 q1, r0
+; CHECK-NEXT: vneg.s16 q1, q1
+; CHECK-NEXT: vshl.u16 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %i = insertelement <8 x i16> undef, i16 %src2, i32 0
+ %s = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
+ %0 = lshr <8 x i16> %src1, %s
+ ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @shru_qr_int32_t(<4 x i32> %src1, i32 %src2) {
+; CHECK-LABEL: shru_qr_int32_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vdup.32 q1, r0
+; CHECK-NEXT: vneg.s32 q1, q1
+; CHECK-NEXT: vshl.u32 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %i = insertelement <4 x i32> undef, i32 %src2, i32 0
+ %s = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %0 = lshr <4 x i32> %src1, %s
+ ret <4 x i32> %0
+}
+
+
+define arm_aapcs_vfpcc <16 x i8> @shrs_qr_int8_t(<16 x i8> %src1, i8 %src2) {
+; CHECK-LABEL: shrs_qr_int8_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vdup.8 q1, r0
+; CHECK-NEXT: vneg.s8 q1, q1
+; CHECK-NEXT: vshl.s8 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %i = insertelement <16 x i8> undef, i8 %src2, i32 0
+ %s = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
+ %0 = ashr <16 x i8> %src1, %s
+ ret <16 x i8> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @shrs_qr_int16_t(<8 x i16> %src1, i16 %src2) {
+; CHECK-LABEL: shrs_qr_int16_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vdup.16 q1, r0
+; CHECK-NEXT: vneg.s16 q1, q1
+; CHECK-NEXT: vshl.s16 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %i = insertelement <8 x i16> undef, i16 %src2, i32 0
+ %s = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
+ %0 = ashr <8 x i16> %src1, %s
+ ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @shrs_qr_int32_t(<4 x i32> %src1, i32 %src2) {
+; CHECK-LABEL: shrs_qr_int32_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vdup.32 q1, r0
+; CHECK-NEXT: vneg.s32 q1, q1
+; CHECK-NEXT: vshl.s32 q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %i = insertelement <4 x i32> undef, i32 %src2, i32 0
+ %s = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %0 = ashr <4 x i32> %src1, %s
+ ret <4 x i32> %0
+}
+
+
+define arm_aapcs_vfpcc <16 x i8> @shl_qiv_int8_t(<16 x i8> %src1) {
+; CHECK-LABEL: shl_qiv_int8_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: adr r0, .LCPI27_0
+; CHECK-NEXT: vldrw.u32 q1, [r0]
+; CHECK-NEXT: vshl.u8 q0, q0, q1
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI27_0:
+; CHECK-NEXT: .byte 1 @ 0x1
+; CHECK-NEXT: .byte 2 @ 0x2
+; CHECK-NEXT: .byte 3 @ 0x3
+; CHECK-NEXT: .byte 4 @ 0x4
+; CHECK-NEXT: .byte 1 @ 0x1
+; CHECK-NEXT: .byte 2 @ 0x2
+; CHECK-NEXT: .byte 3 @ 0x3
+; CHECK-NEXT: .byte 4 @ 0x4
+; CHECK-NEXT: .byte 1 @ 0x1
+; CHECK-NEXT: .byte 2 @ 0x2
+; CHECK-NEXT: .byte 3 @ 0x3
+; CHECK-NEXT: .byte 4 @ 0x4
+; CHECK-NEXT: .byte 1 @ 0x1
+; CHECK-NEXT: .byte 2 @ 0x2
+; CHECK-NEXT: .byte 3 @ 0x3
+; CHECK-NEXT: .byte 4 @ 0x4
+entry:
+ %0 = shl <16 x i8> %src1, <i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4>
+ ret <16 x i8> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @shl_qiv_int16_t(<8 x i16> %src1) {
+; CHECK-LABEL: shl_qiv_int16_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: adr r0, .LCPI28_0
+; CHECK-NEXT: vldrw.u32 q1, [r0]
+; CHECK-NEXT: vshl.u16 q0, q0, q1
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI28_0:
+; CHECK-NEXT: .short 1 @ 0x1
+; CHECK-NEXT: .short 2 @ 0x2
+; CHECK-NEXT: .short 3 @ 0x3
+; CHECK-NEXT: .short 4 @ 0x4
+; CHECK-NEXT: .short 1 @ 0x1
+; CHECK-NEXT: .short 2 @ 0x2
+; CHECK-NEXT: .short 3 @ 0x3
+; CHECK-NEXT: .short 4 @ 0x4
+entry:
+ %0 = shl <8 x i16> %src1, <i16 1, i16 2, i16 3, i16 4, i16 1, i16 2, i16 3, i16 4>
+ ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @shl_qiv_int32_t(<4 x i32> %src1) {
+; CHECK-LABEL: shl_qiv_int32_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: adr r0, .LCPI29_0
+; CHECK-NEXT: vldrw.u32 q1, [r0]
+; CHECK-NEXT: vshl.u32 q0, q0, q1
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI29_0:
+; CHECK-NEXT: .long 1 @ 0x1
+; CHECK-NEXT: .long 2 @ 0x2
+; CHECK-NEXT: .long 3 @ 0x3
+; CHECK-NEXT: .long 4 @ 0x4
+entry:
+ %0 = shl <4 x i32> %src1, <i32 1, i32 2, i32 3, i32 4>
+ ret <4 x i32> %0
+}