return 0;
}
-enum NeonScalarTypeMod {
- ScalarRet = (1 << 0),
- VectorRet = (1 << 1),
- ScalarArg0 = (1 << 2),
- VectorGetArg0 = (1 << 3),
- VectorCastArg0 = (1 << 4),
- ScalarArg1 = (1 << 5),
- VectorGetArg1 = (1 << 6),
- VectorCastArg1 = (1 << 7),
- ScalarFpCmpzArg1 = (1 << 8),
-
- VectorRetGetArgs01 = VectorRet | VectorGetArg0 | VectorGetArg1,
- FpCmpzModifiers = VectorRet | ScalarArg0 | ScalarFpCmpzArg1
+enum {
+ AddRetType = (1 << 0),
+ Add1ArgType = (1 << 1),
+ Add2ArgTypes = (1 << 2),
+
+ VectorizeRetType = (1 << 3),
+ VectorizeArgTypes = (1 << 4),
+
+ InventFloatType = (1 << 5),
+
+ Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
+ VectorRet = AddRetType | VectorizeRetType,
+ VectorRetGetArgs01 =
+ AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
+ FpCmpzModifiers =
+ AddRetType | VectorizeRetType | Add1ArgType | InventFloatType,
};
-struct NeonSISDIntrinsicInfo {
+ struct NeonSISDIntrinsicInfo {
unsigned BuiltinID;
unsigned LLVMIntrinsic;
const char *NameHint;
Intrinsic::LLVMIntrinsic, #NameBase, TypeModifier }
static const NeonSISDIntrinsicInfo AArch64SISDIntrinsicInfo[] = {
- SISDMAP1(vabdd, f64, aarch64_neon_vabd, ScalarRet),
- SISDMAP1(vabds, f32, aarch64_neon_vabd, ScalarRet),
+ SISDMAP1(vabdd, f64, aarch64_neon_vabd, AddRetType),
+ SISDMAP1(vabds, f32, aarch64_neon_vabd, AddRetType),
SISDMAP1(vabsd, s64, aarch64_neon_vabs, 0),
SISDMAP1(vaddd, s64, aarch64_neon_vaddds, 0),
SISDMAP1(vaddd, u64, aarch64_neon_vadddu, 0),
- SISDMAP1(vaddlv, s16, aarch64_neon_saddlv, VectorRet | VectorCastArg1),
- SISDMAP1(vaddlv, s32, aarch64_neon_saddlv, VectorRet | VectorCastArg1),
- SISDMAP1(vaddlv, s8, aarch64_neon_saddlv, VectorRet | VectorCastArg1),
- SISDMAP1(vaddlv, u16, aarch64_neon_uaddlv, VectorRet | VectorCastArg1),
- SISDMAP1(vaddlv, u32, aarch64_neon_uaddlv, VectorRet | VectorCastArg1),
- SISDMAP1(vaddlv, u8, aarch64_neon_uaddlv, VectorRet | VectorCastArg1),
- SISDMAP1(vaddlvq, s16, aarch64_neon_saddlv, VectorRet | VectorCastArg1),
- SISDMAP1(vaddlvq, s32, aarch64_neon_saddlv, VectorRet | VectorCastArg1),
- SISDMAP1(vaddlvq, s8, aarch64_neon_saddlv, VectorRet | VectorCastArg1),
- SISDMAP1(vaddlvq, u16, aarch64_neon_uaddlv, VectorRet | VectorCastArg1),
- SISDMAP1(vaddlvq, u32, aarch64_neon_uaddlv, VectorRet | VectorCastArg1),
- SISDMAP1(vaddlvq, u8, aarch64_neon_uaddlv, VectorRet | VectorCastArg1),
- SISDMAP1(vaddv, f32, aarch64_neon_vpfadd, ScalarRet | VectorCastArg0),
- SISDMAP1(vaddv, s16, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
- SISDMAP1(vaddv, s32, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
- SISDMAP1(vaddv, s8, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
- SISDMAP1(vaddv, u16, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
- SISDMAP1(vaddv, u32, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
- SISDMAP1(vaddv, u8, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
- SISDMAP1(vaddvq, f32, aarch64_neon_vpfadd, ScalarRet | VectorCastArg0),
- SISDMAP1(vaddvq, f64, aarch64_neon_vpfadd, ScalarRet | VectorCastArg0),
- SISDMAP1(vaddvq, s16, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
- SISDMAP1(vaddvq, s32, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
- SISDMAP1(vaddvq, s64, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
- SISDMAP1(vaddvq, s8, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
- SISDMAP1(vaddvq, u16, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
- SISDMAP1(vaddvq, u32, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
- SISDMAP1(vaddvq, u64, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
- SISDMAP1(vaddvq, u8, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
- SISDMAP1(vcaged, f64, aarch64_neon_fcage, VectorRet | ScalarArg0 | ScalarArg1),
- SISDMAP1(vcages, f32, aarch64_neon_fcage, VectorRet | ScalarArg0 | ScalarArg1),
- SISDMAP1(vcagtd, f64, aarch64_neon_fcagt, VectorRet | ScalarArg0 | ScalarArg1),
- SISDMAP1(vcagts, f32, aarch64_neon_fcagt, VectorRet | ScalarArg0 | ScalarArg1),
- SISDMAP1(vcaled, f64, aarch64_neon_fcage, VectorRet | ScalarArg0 | ScalarArg1),
- SISDMAP1(vcales, f32, aarch64_neon_fcage, VectorRet | ScalarArg0 | ScalarArg1),
- SISDMAP1(vcaltd, f64, aarch64_neon_fcagt, VectorRet | ScalarArg0 | ScalarArg1),
- SISDMAP1(vcalts, f32, aarch64_neon_fcagt, VectorRet | ScalarArg0 | ScalarArg1),
- SISDMAP1(vceqd, f64, aarch64_neon_fceq, VectorRet | ScalarArg0 | ScalarArg1),
+ SISDMAP1(vaddlv, s16, aarch64_neon_saddlv, VectorRet | Add1ArgType),
+ SISDMAP1(vaddlv, s32, aarch64_neon_saddlv, VectorRet | Add1ArgType),
+ SISDMAP1(vaddlv, s8, aarch64_neon_saddlv, VectorRet | Add1ArgType),
+ SISDMAP1(vaddlv, u16, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
+ SISDMAP1(vaddlv, u32, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
+ SISDMAP1(vaddlv, u8, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
+ SISDMAP1(vaddlvq, s16, aarch64_neon_saddlv, VectorRet | Add1ArgType),
+ SISDMAP1(vaddlvq, s32, aarch64_neon_saddlv, VectorRet | Add1ArgType),
+ SISDMAP1(vaddlvq, s8, aarch64_neon_saddlv, VectorRet | Add1ArgType),
+ SISDMAP1(vaddlvq, u16, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
+ SISDMAP1(vaddlvq, u32, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
+ SISDMAP1(vaddlvq, u8, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
+ SISDMAP1(vaddv, f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
+ SISDMAP1(vaddv, s16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ SISDMAP1(vaddv, s32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ SISDMAP1(vaddv, s8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ SISDMAP1(vaddv, u16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ SISDMAP1(vaddv, u32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ SISDMAP1(vaddv, u8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ SISDMAP1(vaddvq, f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
+ SISDMAP1(vaddvq, f64, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
+ SISDMAP1(vaddvq, s16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ SISDMAP1(vaddvq, s32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ SISDMAP1(vaddvq, s64, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ SISDMAP1(vaddvq, s8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ SISDMAP1(vaddvq, u16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ SISDMAP1(vaddvq, u32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ SISDMAP1(vaddvq, u64, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ SISDMAP1(vaddvq, u8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ SISDMAP1(vcaged, f64, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
+ SISDMAP1(vcages, f32, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
+ SISDMAP1(vcagtd, f64, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
+ SISDMAP1(vcagts, f32, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
+ SISDMAP1(vcaled, f64, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
+ SISDMAP1(vcales, f32, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
+ SISDMAP1(vcaltd, f64, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
+ SISDMAP1(vcalts, f32, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
+ SISDMAP1(vceqd, f64, aarch64_neon_fceq, VectorRet | Add2ArgTypes),
SISDMAP1(vceqd, s64, aarch64_neon_vceq, VectorRetGetArgs01),
SISDMAP1(vceqd, u64, aarch64_neon_vceq, VectorRetGetArgs01),
- SISDMAP1(vceqs, f32, aarch64_neon_fceq, VectorRet | ScalarArg0 | ScalarArg1),
+ SISDMAP1(vceqs, f32, aarch64_neon_fceq, VectorRet | Add2ArgTypes),
SISDMAP1(vceqzd, f64, aarch64_neon_fceq, FpCmpzModifiers),
SISDMAP1(vceqzd, s64, aarch64_neon_vceq, VectorRetGetArgs01),
SISDMAP1(vceqzd, u64, aarch64_neon_vceq, VectorRetGetArgs01),
SISDMAP1(vceqzs, f32, aarch64_neon_fceq, FpCmpzModifiers),
- SISDMAP1(vcged, f64, aarch64_neon_fcge, VectorRet | ScalarArg0 | ScalarArg1),
+ SISDMAP1(vcged, f64, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
SISDMAP1(vcged, s64, aarch64_neon_vcge, VectorRetGetArgs01),
SISDMAP1(vcged, u64, aarch64_neon_vchs, VectorRetGetArgs01),
- SISDMAP1(vcges, f32, aarch64_neon_fcge, VectorRet | ScalarArg0 | ScalarArg1),
+ SISDMAP1(vcges, f32, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
SISDMAP1(vcgezd, f64, aarch64_neon_fcge, FpCmpzModifiers),
SISDMAP1(vcgezd, s64, aarch64_neon_vcge, VectorRetGetArgs01),
SISDMAP1(vcgezs, f32, aarch64_neon_fcge, FpCmpzModifiers),
- SISDMAP1(vcgtd, f64, aarch64_neon_fcgt, VectorRet | ScalarArg0 | ScalarArg1),
+ SISDMAP1(vcgtd, f64, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
SISDMAP1(vcgtd, s64, aarch64_neon_vcgt, VectorRetGetArgs01),
SISDMAP1(vcgtd, u64, aarch64_neon_vchi, VectorRetGetArgs01),
- SISDMAP1(vcgts, f32, aarch64_neon_fcgt, VectorRet | ScalarArg0 | ScalarArg1),
+ SISDMAP1(vcgts, f32, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
SISDMAP1(vcgtzd, f64, aarch64_neon_fcgt, FpCmpzModifiers),
SISDMAP1(vcgtzd, s64, aarch64_neon_vcgt, VectorRetGetArgs01),
SISDMAP1(vcgtzs, f32, aarch64_neon_fcgt, FpCmpzModifiers),
- SISDMAP1(vcled, f64, aarch64_neon_fcge, VectorRet | ScalarArg0 | ScalarArg1),
+ SISDMAP1(vcled, f64, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
SISDMAP1(vcled, s64, aarch64_neon_vcge, VectorRetGetArgs01),
SISDMAP1(vcled, u64, aarch64_neon_vchs, VectorRetGetArgs01),
- SISDMAP1(vcles, f32, aarch64_neon_fcge, VectorRet | ScalarArg0 | ScalarArg1),
+ SISDMAP1(vcles, f32, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
SISDMAP1(vclezd, f64, aarch64_neon_fclez, FpCmpzModifiers),
SISDMAP1(vclezd, s64, aarch64_neon_vclez, VectorRetGetArgs01),
SISDMAP1(vclezs, f32, aarch64_neon_fclez, FpCmpzModifiers),
- SISDMAP1(vcltd, f64, aarch64_neon_fcgt, VectorRet | ScalarArg0 | ScalarArg1),
+ SISDMAP1(vcltd, f64, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
SISDMAP1(vcltd, s64, aarch64_neon_vcgt, VectorRetGetArgs01),
SISDMAP1(vcltd, u64, aarch64_neon_vchi, VectorRetGetArgs01),
- SISDMAP1(vclts, f32, aarch64_neon_fcgt, VectorRet | ScalarArg0 | ScalarArg1),
+ SISDMAP1(vclts, f32, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
SISDMAP1(vcltzd, f64, aarch64_neon_fcltz, FpCmpzModifiers),
SISDMAP1(vcltzd, s64, aarch64_neon_vcltz, VectorRetGetArgs01),
SISDMAP1(vcltzs, f32, aarch64_neon_fcltz, FpCmpzModifiers),
- SISDMAP1(vcvtad_s64, f64, aarch64_neon_fcvtas, VectorRet | ScalarArg1),
- SISDMAP1(vcvtad_u64, f64, aarch64_neon_fcvtau, VectorRet | ScalarArg1),
- SISDMAP1(vcvtas_s32, f32, aarch64_neon_fcvtas, VectorRet | ScalarArg1),
- SISDMAP1(vcvtas_u32, f32, aarch64_neon_fcvtau, VectorRet | ScalarArg1),
- SISDMAP1(vcvtd_f64, s64, aarch64_neon_vcvtint2fps, ScalarRet | VectorGetArg0),
- SISDMAP1(vcvtd_f64, u64, aarch64_neon_vcvtint2fpu, ScalarRet | VectorGetArg0),
- SISDMAP1(vcvtd_n_f64, s64, aarch64_neon_vcvtfxs2fp_n, ScalarRet | VectorGetArg0),
- SISDMAP1(vcvtd_n_f64, u64, aarch64_neon_vcvtfxu2fp_n, ScalarRet | VectorGetArg0),
- SISDMAP1(vcvtd_n_s64, f64, aarch64_neon_vcvtfp2fxs_n, VectorRet | ScalarArg0),
- SISDMAP1(vcvtd_n_u64, f64, aarch64_neon_vcvtfp2fxu_n, VectorRet | ScalarArg0),
- SISDMAP1(vcvtd_s64, f64, aarch64_neon_fcvtzs, VectorRet | ScalarArg1),
- SISDMAP1(vcvtd_u64, f64, aarch64_neon_fcvtzu, VectorRet | ScalarArg1),
- SISDMAP1(vcvtmd_s64, f64, aarch64_neon_fcvtms, VectorRet | ScalarArg1),
- SISDMAP1(vcvtmd_u64, f64, aarch64_neon_fcvtmu, VectorRet | ScalarArg1),
- SISDMAP1(vcvtms_s32, f32, aarch64_neon_fcvtms, VectorRet | ScalarArg1),
- SISDMAP1(vcvtms_u32, f32, aarch64_neon_fcvtmu, VectorRet | ScalarArg1),
- SISDMAP1(vcvtnd_s64, f64, aarch64_neon_fcvtns, VectorRet | ScalarArg1),
- SISDMAP1(vcvtnd_u64, f64, aarch64_neon_fcvtnu, VectorRet | ScalarArg1),
- SISDMAP1(vcvtns_s32, f32, aarch64_neon_fcvtns, VectorRet | ScalarArg1),
- SISDMAP1(vcvtns_u32, f32, aarch64_neon_fcvtnu, VectorRet | ScalarArg1),
- SISDMAP1(vcvtpd_s64, f64, aarch64_neon_fcvtps, VectorRet | ScalarArg1),
- SISDMAP1(vcvtpd_u64, f64, aarch64_neon_fcvtpu, VectorRet | ScalarArg1),
- SISDMAP1(vcvtps_s32, f32, aarch64_neon_fcvtps, VectorRet | ScalarArg1),
- SISDMAP1(vcvtps_u32, f32, aarch64_neon_fcvtpu, VectorRet | ScalarArg1),
- SISDMAP1(vcvts_f32, s32, aarch64_neon_vcvtint2fps, ScalarRet | VectorGetArg0),
- SISDMAP1(vcvts_f32, u32, aarch64_neon_vcvtint2fpu, ScalarRet | VectorGetArg0),
- SISDMAP1(vcvts_n_f32, s32, aarch64_neon_vcvtfxs2fp_n, ScalarRet | VectorGetArg0),
- SISDMAP1(vcvts_n_f32, u32, aarch64_neon_vcvtfxu2fp_n, ScalarRet | VectorGetArg0),
- SISDMAP1(vcvts_n_s32, f32, aarch64_neon_vcvtfp2fxs_n, VectorRet | ScalarArg0),
- SISDMAP1(vcvts_n_u32, f32, aarch64_neon_vcvtfp2fxu_n, VectorRet | ScalarArg0),
- SISDMAP1(vcvts_s32, f32, aarch64_neon_fcvtzs, VectorRet | ScalarArg1),
- SISDMAP1(vcvts_u32, f32, aarch64_neon_fcvtzu, VectorRet | ScalarArg1),
+ SISDMAP1(vcvtad_s64, f64, aarch64_neon_fcvtas, VectorRet | Add1ArgType),
+ SISDMAP1(vcvtad_u64, f64, aarch64_neon_fcvtau, VectorRet | Add1ArgType),
+ SISDMAP1(vcvtas_s32, f32, aarch64_neon_fcvtas, VectorRet | Add1ArgType),
+ SISDMAP1(vcvtas_u32, f32, aarch64_neon_fcvtau, VectorRet | Add1ArgType),
+ SISDMAP1(vcvtd_f64, s64, aarch64_neon_vcvtint2fps, AddRetType | Vectorize1ArgType),
+ SISDMAP1(vcvtd_f64, u64, aarch64_neon_vcvtint2fpu, AddRetType | Vectorize1ArgType),
+ SISDMAP1(vcvtd_n_f64, s64, aarch64_neon_vcvtfxs2fp_n, AddRetType | Vectorize1ArgType),
+ SISDMAP1(vcvtd_n_f64, u64, aarch64_neon_vcvtfxu2fp_n, AddRetType | Vectorize1ArgType),
+ SISDMAP1(vcvtd_n_s64, f64, aarch64_neon_vcvtfp2fxs_n, VectorRet | Add1ArgType),
+ SISDMAP1(vcvtd_n_u64, f64, aarch64_neon_vcvtfp2fxu_n, VectorRet | Add1ArgType),
+ SISDMAP1(vcvtd_s64, f64, aarch64_neon_fcvtzs, VectorRet | Add1ArgType),
+ SISDMAP1(vcvtd_u64, f64, aarch64_neon_fcvtzu, VectorRet | Add1ArgType),
+ SISDMAP1(vcvtmd_s64, f64, aarch64_neon_fcvtms, VectorRet | Add1ArgType),
+ SISDMAP1(vcvtmd_u64, f64, aarch64_neon_fcvtmu, VectorRet | Add1ArgType),
+ SISDMAP1(vcvtms_s32, f32, aarch64_neon_fcvtms, VectorRet | Add1ArgType),
+ SISDMAP1(vcvtms_u32, f32, aarch64_neon_fcvtmu, VectorRet | Add1ArgType),
+ SISDMAP1(vcvtnd_s64, f64, aarch64_neon_fcvtns, VectorRet | Add1ArgType),
+ SISDMAP1(vcvtnd_u64, f64, aarch64_neon_fcvtnu, VectorRet | Add1ArgType),
+ SISDMAP1(vcvtns_s32, f32, aarch64_neon_fcvtns, VectorRet | Add1ArgType),
+ SISDMAP1(vcvtns_u32, f32, aarch64_neon_fcvtnu, VectorRet | Add1ArgType),
+ SISDMAP1(vcvtpd_s64, f64, aarch64_neon_fcvtps, VectorRet | Add1ArgType),
+ SISDMAP1(vcvtpd_u64, f64, aarch64_neon_fcvtpu, VectorRet | Add1ArgType),
+ SISDMAP1(vcvtps_s32, f32, aarch64_neon_fcvtps, VectorRet | Add1ArgType),
+ SISDMAP1(vcvtps_u32, f32, aarch64_neon_fcvtpu, VectorRet | Add1ArgType),
+ SISDMAP1(vcvts_f32, s32, aarch64_neon_vcvtint2fps, AddRetType | Vectorize1ArgType),
+ SISDMAP1(vcvts_f32, u32, aarch64_neon_vcvtint2fpu, AddRetType | Vectorize1ArgType),
+ SISDMAP1(vcvts_n_f32, s32, aarch64_neon_vcvtfxs2fp_n, AddRetType | Vectorize1ArgType),
+ SISDMAP1(vcvts_n_f32, u32, aarch64_neon_vcvtfxu2fp_n, AddRetType | Vectorize1ArgType),
+ SISDMAP1(vcvts_n_s32, f32, aarch64_neon_vcvtfp2fxs_n, VectorRet | Add1ArgType),
+ SISDMAP1(vcvts_n_u32, f32, aarch64_neon_vcvtfp2fxu_n, VectorRet | Add1ArgType),
+ SISDMAP1(vcvts_s32, f32, aarch64_neon_fcvtzs, VectorRet | Add1ArgType),
+ SISDMAP1(vcvts_u32, f32, aarch64_neon_fcvtzu, VectorRet | Add1ArgType),
SISDMAP1(vcvtxd_f32, f64, aarch64_neon_fcvtxn, 0),
SISDMAP0(vdupb_lane, i8),
SISDMAP0(vdupb_laneq, i8),
SISDMAP0(vgetq_lane, i32),
SISDMAP0(vgetq_lane, i64),
SISDMAP0(vgetq_lane, i8),
- SISDMAP1(vmaxnmv, f32, aarch64_neon_vpfmaxnm, ScalarRet | VectorCastArg0),
+ SISDMAP1(vmaxnmv, f32, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
SISDMAP1(vmaxnmvq, f32, aarch64_neon_vmaxnmv, 0),
- SISDMAP1(vmaxnmvq, f64, aarch64_neon_vpfmaxnm, ScalarRet | VectorCastArg0),
- SISDMAP1(vmaxv, f32, aarch64_neon_vpmax, ScalarRet | VectorCastArg0),
- SISDMAP1(vmaxv, s16, aarch64_neon_smaxv, VectorRet | VectorCastArg1),
- SISDMAP1(vmaxv, s32, aarch64_neon_smaxv, VectorRet | VectorCastArg1),
- SISDMAP1(vmaxv, s8, aarch64_neon_smaxv, VectorRet | VectorCastArg1),
- SISDMAP1(vmaxv, u16, aarch64_neon_umaxv, VectorRet | VectorCastArg1),
- SISDMAP1(vmaxv, u32, aarch64_neon_umaxv, VectorRet | VectorCastArg1),
- SISDMAP1(vmaxv, u8, aarch64_neon_umaxv, VectorRet | VectorCastArg1),
+ SISDMAP1(vmaxnmvq, f64, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
+ SISDMAP1(vmaxv, f32, aarch64_neon_vpmax, AddRetType | Add1ArgType),
+ SISDMAP1(vmaxv, s16, aarch64_neon_smaxv, VectorRet | Add1ArgType),
+ SISDMAP1(vmaxv, s32, aarch64_neon_smaxv, VectorRet | Add1ArgType),
+ SISDMAP1(vmaxv, s8, aarch64_neon_smaxv, VectorRet | Add1ArgType),
+ SISDMAP1(vmaxv, u16, aarch64_neon_umaxv, VectorRet | Add1ArgType),
+ SISDMAP1(vmaxv, u32, aarch64_neon_umaxv, VectorRet | Add1ArgType),
+ SISDMAP1(vmaxv, u8, aarch64_neon_umaxv, VectorRet | Add1ArgType),
SISDMAP1(vmaxvq, f32, aarch64_neon_vmaxv, 0),
- SISDMAP1(vmaxvq, f64, aarch64_neon_vpmax, ScalarRet | VectorCastArg0),
- SISDMAP1(vmaxvq, s16, aarch64_neon_smaxv, VectorRet | VectorCastArg1),
- SISDMAP1(vmaxvq, s32, aarch64_neon_smaxv, VectorRet | VectorCastArg1),
- SISDMAP1(vmaxvq, s8, aarch64_neon_smaxv, VectorRet | VectorCastArg1),
- SISDMAP1(vmaxvq, u16, aarch64_neon_umaxv, VectorRet | VectorCastArg1),
- SISDMAP1(vmaxvq, u32, aarch64_neon_umaxv, VectorRet | VectorCastArg1),
- SISDMAP1(vmaxvq, u8, aarch64_neon_umaxv, VectorRet | VectorCastArg1),
- SISDMAP1(vminnmv, f32, aarch64_neon_vpfminnm, ScalarRet | VectorCastArg0),
+ SISDMAP1(vmaxvq, f64, aarch64_neon_vpmax, AddRetType | Add1ArgType),
+ SISDMAP1(vmaxvq, s16, aarch64_neon_smaxv, VectorRet | Add1ArgType),
+ SISDMAP1(vmaxvq, s32, aarch64_neon_smaxv, VectorRet | Add1ArgType),
+ SISDMAP1(vmaxvq, s8, aarch64_neon_smaxv, VectorRet | Add1ArgType),
+ SISDMAP1(vmaxvq, u16, aarch64_neon_umaxv, VectorRet | Add1ArgType),
+ SISDMAP1(vmaxvq, u32, aarch64_neon_umaxv, VectorRet | Add1ArgType),
+ SISDMAP1(vmaxvq, u8, aarch64_neon_umaxv, VectorRet | Add1ArgType),
+ SISDMAP1(vminnmv, f32, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
SISDMAP1(vminnmvq, f32, aarch64_neon_vminnmv, 0),
- SISDMAP1(vminnmvq, f64, aarch64_neon_vpfminnm, ScalarRet | VectorCastArg0),
- SISDMAP1(vminv, f32, aarch64_neon_vpmin, ScalarRet | VectorCastArg0),
- SISDMAP1(vminv, s16, aarch64_neon_sminv, VectorRet | VectorCastArg1),
- SISDMAP1(vminv, s32, aarch64_neon_sminv, VectorRet | VectorCastArg1),
- SISDMAP1(vminv, s8, aarch64_neon_sminv, VectorRet | VectorCastArg1),
- SISDMAP1(vminv, u16, aarch64_neon_uminv, VectorRet | VectorCastArg1),
- SISDMAP1(vminv, u32, aarch64_neon_uminv, VectorRet | VectorCastArg1),
- SISDMAP1(vminv, u8, aarch64_neon_uminv, VectorRet | VectorCastArg1),
+ SISDMAP1(vminnmvq, f64, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
+ SISDMAP1(vminv, f32, aarch64_neon_vpmin, AddRetType | Add1ArgType),
+ SISDMAP1(vminv, s16, aarch64_neon_sminv, VectorRet | Add1ArgType),
+ SISDMAP1(vminv, s32, aarch64_neon_sminv, VectorRet | Add1ArgType),
+ SISDMAP1(vminv, s8, aarch64_neon_sminv, VectorRet | Add1ArgType),
+ SISDMAP1(vminv, u16, aarch64_neon_uminv, VectorRet | Add1ArgType),
+ SISDMAP1(vminv, u32, aarch64_neon_uminv, VectorRet | Add1ArgType),
+ SISDMAP1(vminv, u8, aarch64_neon_uminv, VectorRet | Add1ArgType),
SISDMAP1(vminvq, f32, aarch64_neon_vminv, 0),
- SISDMAP1(vminvq, f64, aarch64_neon_vpmin, ScalarRet | VectorCastArg0),
- SISDMAP1(vminvq, s16, aarch64_neon_sminv, VectorRet | VectorCastArg1),
- SISDMAP1(vminvq, s32, aarch64_neon_sminv, VectorRet | VectorCastArg1),
- SISDMAP1(vminvq, s8, aarch64_neon_sminv, VectorRet | VectorCastArg1),
- SISDMAP1(vminvq, u16, aarch64_neon_uminv, VectorRet | VectorCastArg1),
- SISDMAP1(vminvq, u32, aarch64_neon_uminv, VectorRet | VectorCastArg1),
- SISDMAP1(vminvq, u8, aarch64_neon_uminv, VectorRet | VectorCastArg1),
+ SISDMAP1(vminvq, f64, aarch64_neon_vpmin, AddRetType | Add1ArgType),
+ SISDMAP1(vminvq, s16, aarch64_neon_sminv, VectorRet | Add1ArgType),
+ SISDMAP1(vminvq, s32, aarch64_neon_sminv, VectorRet | Add1ArgType),
+ SISDMAP1(vminvq, s8, aarch64_neon_sminv, VectorRet | Add1ArgType),
+ SISDMAP1(vminvq, u16, aarch64_neon_uminv, VectorRet | Add1ArgType),
+ SISDMAP1(vminvq, u32, aarch64_neon_uminv, VectorRet | Add1ArgType),
+ SISDMAP1(vminvq, u8, aarch64_neon_uminv, VectorRet | Add1ArgType),
SISDMAP0(vmul_n, f64),
SISDMAP1(vmull, p64, aarch64_neon_vmull_p64, 0),
SISDMAP0(vmulxd, f64),
SISDMAP0(vmulxs, f32),
SISDMAP1(vnegd, s64, aarch64_neon_vneg, 0),
- SISDMAP1(vpaddd, f64, aarch64_neon_vpfadd, ScalarRet | VectorCastArg0),
+ SISDMAP1(vpaddd, f64, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
SISDMAP1(vpaddd, s64, aarch64_neon_vpadd, 0),
SISDMAP1(vpaddd, u64, aarch64_neon_vpadd, 0),
- SISDMAP1(vpadds, f32, aarch64_neon_vpfadd, ScalarRet | VectorCastArg0),
- SISDMAP1(vpmaxnmqd, f64, aarch64_neon_vpfmaxnm, ScalarRet | VectorCastArg0),
- SISDMAP1(vpmaxnms, f32, aarch64_neon_vpfmaxnm, ScalarRet | VectorCastArg0),
- SISDMAP1(vpmaxqd, f64, aarch64_neon_vpmax, ScalarRet | VectorCastArg0),
- SISDMAP1(vpmaxs, f32, aarch64_neon_vpmax, ScalarRet | VectorCastArg0),
- SISDMAP1(vpminnmqd, f64, aarch64_neon_vpfminnm, ScalarRet | VectorCastArg0),
- SISDMAP1(vpminnms, f32, aarch64_neon_vpfminnm, ScalarRet | VectorCastArg0),
- SISDMAP1(vpminqd, f64, aarch64_neon_vpmin, ScalarRet | VectorCastArg0),
- SISDMAP1(vpmins, f32, aarch64_neon_vpmin, ScalarRet | VectorCastArg0),
+ SISDMAP1(vpadds, f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
+ SISDMAP1(vpmaxnmqd, f64, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
+ SISDMAP1(vpmaxnms, f32, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
+ SISDMAP1(vpmaxqd, f64, aarch64_neon_vpmax, AddRetType | Add1ArgType),
+ SISDMAP1(vpmaxs, f32, aarch64_neon_vpmax, AddRetType | Add1ArgType),
+ SISDMAP1(vpminnmqd, f64, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
+ SISDMAP1(vpminnms, f32, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
+ SISDMAP1(vpminqd, f64, aarch64_neon_vpmin, AddRetType | Add1ArgType),
+ SISDMAP1(vpmins, f32, aarch64_neon_vpmin, AddRetType | Add1ArgType),
SISDMAP1(vqabsb, s8, arm_neon_vqabs, VectorRet),
SISDMAP1(vqabsd, s64, arm_neon_vqabs, VectorRet),
SISDMAP1(vqabsh, s16, arm_neon_vqabs, VectorRet),
SISDMAP1(vqsubh, u16, arm_neon_vqsubu, VectorRet),
SISDMAP1(vqsubs, s32, arm_neon_vqsubs, VectorRet),
SISDMAP1(vqsubs, u32, arm_neon_vqsubu, VectorRet),
- SISDMAP1(vrecped, f64, aarch64_neon_vrecpe, ScalarRet),
- SISDMAP1(vrecpes, f32, aarch64_neon_vrecpe, ScalarRet),
- SISDMAP1(vrecpsd, f64, aarch64_neon_vrecps, ScalarRet),
- SISDMAP1(vrecpss, f32, aarch64_neon_vrecps, ScalarRet),
- SISDMAP1(vrecpxd, f64, aarch64_neon_vrecpx, ScalarRet),
- SISDMAP1(vrecpxs, f32, aarch64_neon_vrecpx, ScalarRet),
+ SISDMAP1(vrecped, f64, aarch64_neon_vrecpe, AddRetType),
+ SISDMAP1(vrecpes, f32, aarch64_neon_vrecpe, AddRetType),
+ SISDMAP1(vrecpsd, f64, aarch64_neon_vrecps, AddRetType),
+ SISDMAP1(vrecpss, f32, aarch64_neon_vrecps, AddRetType),
+ SISDMAP1(vrecpxd, f64, aarch64_neon_vrecpx, AddRetType),
+ SISDMAP1(vrecpxs, f32, aarch64_neon_vrecpx, AddRetType),
SISDMAP1(vrshld, s64, aarch64_neon_vrshlds, 0),
SISDMAP1(vrshld, u64, aarch64_neon_vrshldu, 0),
SISDMAP1(vrshrd_n, s64, aarch64_neon_vsrshr, VectorRet),
SISDMAP1(vrshrd_n, u64, aarch64_neon_vurshr, VectorRet),
- SISDMAP1(vrsqrted, f64, aarch64_neon_vrsqrte, ScalarRet),
- SISDMAP1(vrsqrtes, f32, aarch64_neon_vrsqrte, ScalarRet),
- SISDMAP1(vrsqrtsd, f64, aarch64_neon_vrsqrts, ScalarRet),
- SISDMAP1(vrsqrtss, f32, aarch64_neon_vrsqrts, ScalarRet),
+ SISDMAP1(vrsqrted, f64, aarch64_neon_vrsqrte, AddRetType),
+ SISDMAP1(vrsqrtes, f32, aarch64_neon_vrsqrte, AddRetType),
+ SISDMAP1(vrsqrtsd, f64, aarch64_neon_vrsqrts, AddRetType),
+ SISDMAP1(vrsqrtss, f32, aarch64_neon_vrsqrts, AddRetType),
SISDMAP1(vrsrad_n, s64, aarch64_neon_vrsrads_n, 0),
SISDMAP1(vrsrad_n, u64, aarch64_neon_vrsradu_n, 0),
SISDMAP0(vset_lane, f32),
static bool AArch64SISDIntrinsicInfoProvenSorted = false;
#endif
+Function *CodeGenFunction::LookupNeonIntrinsic(unsigned IntrinsicID,
+ unsigned Modifier,
+ llvm::Type *ArgType,
+ const CallExpr *E) {
+ // Return type.
+ SmallVector<llvm::Type *, 3> Tys;
+ if (Modifier & AddRetType) {
+ llvm::Type *Ty = ConvertType(E->getCallReturnType());
+ if (Modifier & VectorizeRetType)
+ Ty = llvm::VectorType::get(Ty, 1);
+
+ Tys.push_back(Ty);
+ }
+
+ // Arguments.
+ if (Modifier & VectorizeArgTypes)
+ ArgType = llvm::VectorType::get(ArgType, 1);
+
+ if (Modifier & (Add1ArgType | Add2ArgTypes))
+ Tys.push_back(ArgType);
+
+ if (Modifier & Add2ArgTypes)
+ Tys.push_back(ArgType);
+
+ if (Modifier & InventFloatType)
+ Tys.push_back(FloatTy);
+
+ return CGM.getIntrinsic(IntrinsicID, Tys);
+}
+
static Value *
EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
const NeonSISDIntrinsicInfo &SISDInfo,
return 0;
// Determine the type(s) of this overloaded AArch64 intrinsic.
- Function *F = 0;
- SmallVector<llvm::Type *, 3> Tys;
-
- // Return type.
- if (IntTypes & (ScalarRet | VectorRet)) {
- llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
- if (IntTypes & ScalarRet) {
- // Scalar return value.
- Tys.push_back(Ty);
- } else if (IntTypes & VectorRet) {
- // Convert the scalar return type to one-vector element type.
- Tys.push_back(llvm::VectorType::get(Ty, 1));
- }
- }
-
- // Arguments.
- if (IntTypes & (ScalarArg0 | VectorGetArg0 | VectorCastArg0)) {
- const Expr *Arg = E->getArg(0);
- llvm::Type *Ty = CGF.ConvertType(Arg->getType());
- if (IntTypes & ScalarArg0) {
- // Scalar argument.
- Tys.push_back(Ty);
- } else if (IntTypes & VectorGetArg0) {
- // Convert the scalar argument to one-vector element type.
- Tys.push_back(llvm::VectorType::get(Ty, 1));
- } else if (IntTypes & VectorCastArg0) {
- // Cast the argument to vector type.
- Tys.push_back(cast<llvm::VectorType>(Ty));
- }
- }
-
- // The only intrinsics that require a 2nd argument are the compare intrinsics.
- // However, the builtins don't always have a 2nd argument (e.g.,
- // floating-point compare to zero), so we inspect the first argument to
- // determine the type.
- if (IntTypes & (ScalarArg1 | VectorGetArg1 | VectorCastArg1)) {
- const Expr *Arg = E->getArg(0);
- llvm::Type *Ty = CGF.ConvertType(Arg->getType());
- if (IntTypes & ScalarArg1) {
- // Scalar argument.
- Tys.push_back(Ty);
- } else if (IntTypes & VectorGetArg1) {
- // Convert the scalar argument to one-vector element type.
- Tys.push_back(llvm::VectorType::get(Ty, 1));
- } else if (IntTypes & VectorCastArg1) {
- // Cast the argument to a vector type.
- Tys.push_back(cast<llvm::VectorType>(Ty));
- }
- } else if (IntTypes & ScalarFpCmpzArg1) {
- // Floating-point zero argument.
- Tys.push_back(CGF.FloatTy);
- }
-
- if (IntTypes)
- F = CGF.CGM.getIntrinsic(Int, Tys);
- else
- F = CGF.CGM.getIntrinsic(Int);
+ const Expr *Arg = E->getArg(0);
+ llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
+ Function *F = CGF.LookupNeonIntrinsic(Int, IntTypes, ArgTy, E);
Value *Result = CGF.EmitNeonCall(F, Ops, s);
llvm::Type *ResultType = CGF.ConvertType(E->getType());