From a67b8f1151aa6e3c60bc99314f6eae4547b7e8f0 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Fri, 21 Feb 2014 11:57:20 +0000 Subject: [PATCH] AArch64: refactor table-driven NEON lookup. This extracts the table-driven intrinsic lookup phase into a separate function, to be used by EmitCommonNeonBuiltinExpr soon. It also simplifies the logic used in that lookup, since VectorCastArgN and ScalarArgN were actually identical. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@201847 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGBuiltin.cpp | 387 ++++++++++++++++------------------ lib/CodeGen/CodeGenFunction.h | 2 + 2 files changed, 185 insertions(+), 204 deletions(-) diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index defa071662..9558acba8c 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -2448,22 +2448,25 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(unsigned BuiltinID, return 0; } -enum NeonScalarTypeMod { - ScalarRet = (1 << 0), - VectorRet = (1 << 1), - ScalarArg0 = (1 << 2), - VectorGetArg0 = (1 << 3), - VectorCastArg0 = (1 << 4), - ScalarArg1 = (1 << 5), - VectorGetArg1 = (1 << 6), - VectorCastArg1 = (1 << 7), - ScalarFpCmpzArg1 = (1 << 8), - - VectorRetGetArgs01 = VectorRet | VectorGetArg0 | VectorGetArg1, - FpCmpzModifiers = VectorRet | ScalarArg0 | ScalarFpCmpzArg1 +enum { + AddRetType = (1 << 0), + Add1ArgType = (1 << 1), + Add2ArgTypes = (1 << 2), + + VectorizeRetType = (1 << 3), + VectorizeArgTypes = (1 << 4), + + InventFloatType = (1 << 5), + + Vectorize1ArgType = Add1ArgType | VectorizeArgTypes, + VectorRet = AddRetType | VectorizeRetType, + VectorRetGetArgs01 = + AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes, + FpCmpzModifiers = + AddRetType | VectorizeRetType | Add1ArgType | InventFloatType, }; -struct NeonSISDIntrinsicInfo { + struct NeonSISDIntrinsicInfo { unsigned BuiltinID; unsigned LLVMIntrinsic; const char *NameHint; @@ -2482,116 +2485,116 @@ struct NeonSISDIntrinsicInfo { Intrinsic::LLVMIntrinsic, #NameBase, TypeModifier } static const NeonSISDIntrinsicInfo AArch64SISDIntrinsicInfo[] = { - SISDMAP1(vabdd, f64, aarch64_neon_vabd, ScalarRet), - SISDMAP1(vabds, f32, aarch64_neon_vabd, ScalarRet), + SISDMAP1(vabdd, f64, aarch64_neon_vabd, AddRetType), + SISDMAP1(vabds, f32, aarch64_neon_vabd, AddRetType), SISDMAP1(vabsd, s64, aarch64_neon_vabs, 0), SISDMAP1(vaddd, s64, aarch64_neon_vaddds, 0), SISDMAP1(vaddd, u64, aarch64_neon_vadddu, 0), - SISDMAP1(vaddlv, s16, aarch64_neon_saddlv, VectorRet | VectorCastArg1), - SISDMAP1(vaddlv, s32, aarch64_neon_saddlv, VectorRet | VectorCastArg1), - SISDMAP1(vaddlv, s8, aarch64_neon_saddlv, VectorRet | VectorCastArg1), - SISDMAP1(vaddlv, u16, aarch64_neon_uaddlv, VectorRet | VectorCastArg1), - SISDMAP1(vaddlv, u32, aarch64_neon_uaddlv, VectorRet | VectorCastArg1), - SISDMAP1(vaddlv, u8, aarch64_neon_uaddlv, VectorRet | VectorCastArg1), - SISDMAP1(vaddlvq, s16, aarch64_neon_saddlv, VectorRet | VectorCastArg1), - SISDMAP1(vaddlvq, s32, aarch64_neon_saddlv, VectorRet | VectorCastArg1), - SISDMAP1(vaddlvq, s8, aarch64_neon_saddlv, VectorRet | VectorCastArg1), - SISDMAP1(vaddlvq, u16, aarch64_neon_uaddlv, VectorRet | VectorCastArg1), - SISDMAP1(vaddlvq, u32, aarch64_neon_uaddlv, VectorRet | VectorCastArg1), - SISDMAP1(vaddlvq, u8, aarch64_neon_uaddlv, VectorRet | VectorCastArg1), - SISDMAP1(vaddv, f32, aarch64_neon_vpfadd, ScalarRet | VectorCastArg0), - SISDMAP1(vaddv, s16, aarch64_neon_vaddv, VectorRet | VectorCastArg1), - SISDMAP1(vaddv, s32, aarch64_neon_vaddv, VectorRet | VectorCastArg1), - SISDMAP1(vaddv, s8, aarch64_neon_vaddv, VectorRet | VectorCastArg1), - SISDMAP1(vaddv, u16, aarch64_neon_vaddv, VectorRet | VectorCastArg1), - SISDMAP1(vaddv, u32, aarch64_neon_vaddv, VectorRet | VectorCastArg1), - SISDMAP1(vaddv, u8, aarch64_neon_vaddv, VectorRet | VectorCastArg1), - SISDMAP1(vaddvq, f32, aarch64_neon_vpfadd, ScalarRet | VectorCastArg0), - SISDMAP1(vaddvq, f64, aarch64_neon_vpfadd, ScalarRet | VectorCastArg0), - SISDMAP1(vaddvq, s16, aarch64_neon_vaddv, VectorRet | VectorCastArg1), - SISDMAP1(vaddvq, s32, aarch64_neon_vaddv, VectorRet | VectorCastArg1), - SISDMAP1(vaddvq, s64, aarch64_neon_vaddv, VectorRet | VectorCastArg1), - SISDMAP1(vaddvq, s8, aarch64_neon_vaddv, VectorRet | VectorCastArg1), - SISDMAP1(vaddvq, u16, aarch64_neon_vaddv, VectorRet | VectorCastArg1), - SISDMAP1(vaddvq, u32, aarch64_neon_vaddv, VectorRet | VectorCastArg1), - SISDMAP1(vaddvq, u64, aarch64_neon_vaddv, VectorRet | VectorCastArg1), - SISDMAP1(vaddvq, u8, aarch64_neon_vaddv, VectorRet | VectorCastArg1), - SISDMAP1(vcaged, f64, aarch64_neon_fcage, VectorRet | ScalarArg0 | ScalarArg1), - SISDMAP1(vcages, f32, aarch64_neon_fcage, VectorRet | ScalarArg0 | ScalarArg1), - SISDMAP1(vcagtd, f64, aarch64_neon_fcagt, VectorRet | ScalarArg0 | ScalarArg1), - SISDMAP1(vcagts, f32, aarch64_neon_fcagt, VectorRet | ScalarArg0 | ScalarArg1), - SISDMAP1(vcaled, f64, aarch64_neon_fcage, VectorRet | ScalarArg0 | ScalarArg1), - SISDMAP1(vcales, f32, aarch64_neon_fcage, VectorRet | ScalarArg0 | ScalarArg1), - SISDMAP1(vcaltd, f64, aarch64_neon_fcagt, VectorRet | ScalarArg0 | ScalarArg1), - SISDMAP1(vcalts, f32, aarch64_neon_fcagt, VectorRet | ScalarArg0 | ScalarArg1), - SISDMAP1(vceqd, f64, aarch64_neon_fceq, VectorRet | ScalarArg0 | ScalarArg1), + SISDMAP1(vaddlv, s16, aarch64_neon_saddlv, VectorRet | Add1ArgType), + SISDMAP1(vaddlv, s32, aarch64_neon_saddlv, VectorRet | Add1ArgType), + SISDMAP1(vaddlv, s8, aarch64_neon_saddlv, VectorRet | Add1ArgType), + SISDMAP1(vaddlv, u16, aarch64_neon_uaddlv, VectorRet | Add1ArgType), + SISDMAP1(vaddlv, u32, aarch64_neon_uaddlv, VectorRet | Add1ArgType), + SISDMAP1(vaddlv, u8, aarch64_neon_uaddlv, VectorRet | Add1ArgType), + SISDMAP1(vaddlvq, s16, aarch64_neon_saddlv, VectorRet | Add1ArgType), + SISDMAP1(vaddlvq, s32, aarch64_neon_saddlv, VectorRet | Add1ArgType), + SISDMAP1(vaddlvq, s8, aarch64_neon_saddlv, VectorRet | Add1ArgType), + SISDMAP1(vaddlvq, u16, aarch64_neon_uaddlv, VectorRet | Add1ArgType), + SISDMAP1(vaddlvq, u32, aarch64_neon_uaddlv, VectorRet | Add1ArgType), + SISDMAP1(vaddlvq, u8, aarch64_neon_uaddlv, VectorRet | Add1ArgType), + SISDMAP1(vaddv, f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType), + SISDMAP1(vaddv, s16, aarch64_neon_vaddv, VectorRet | Add1ArgType), + SISDMAP1(vaddv, s32, aarch64_neon_vaddv, VectorRet | Add1ArgType), + SISDMAP1(vaddv, s8, aarch64_neon_vaddv, VectorRet | Add1ArgType), + SISDMAP1(vaddv, u16, aarch64_neon_vaddv, VectorRet | Add1ArgType), + SISDMAP1(vaddv, u32, aarch64_neon_vaddv, VectorRet | Add1ArgType), + SISDMAP1(vaddv, u8, aarch64_neon_vaddv, VectorRet | Add1ArgType), + SISDMAP1(vaddvq, f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType), + SISDMAP1(vaddvq, f64, aarch64_neon_vpfadd, AddRetType | Add1ArgType), + SISDMAP1(vaddvq, s16, aarch64_neon_vaddv, VectorRet | Add1ArgType), + SISDMAP1(vaddvq, s32, aarch64_neon_vaddv, VectorRet | Add1ArgType), + SISDMAP1(vaddvq, s64, aarch64_neon_vaddv, VectorRet | Add1ArgType), + SISDMAP1(vaddvq, s8, aarch64_neon_vaddv, VectorRet | Add1ArgType), + SISDMAP1(vaddvq, u16, aarch64_neon_vaddv, VectorRet | Add1ArgType), + SISDMAP1(vaddvq, u32, aarch64_neon_vaddv, VectorRet | Add1ArgType), + SISDMAP1(vaddvq, u64, aarch64_neon_vaddv, VectorRet | Add1ArgType), + SISDMAP1(vaddvq, u8, aarch64_neon_vaddv, VectorRet | Add1ArgType), + SISDMAP1(vcaged, f64, aarch64_neon_fcage, VectorRet | Add2ArgTypes), + SISDMAP1(vcages, f32, aarch64_neon_fcage, VectorRet | Add2ArgTypes), + SISDMAP1(vcagtd, f64, aarch64_neon_fcagt, VectorRet | Add2ArgTypes), + SISDMAP1(vcagts, f32, aarch64_neon_fcagt, VectorRet | Add2ArgTypes), + SISDMAP1(vcaled, f64, aarch64_neon_fcage, VectorRet | Add2ArgTypes), + SISDMAP1(vcales, f32, aarch64_neon_fcage, VectorRet | Add2ArgTypes), + SISDMAP1(vcaltd, f64, aarch64_neon_fcagt, VectorRet | Add2ArgTypes), + SISDMAP1(vcalts, f32, aarch64_neon_fcagt, VectorRet | Add2ArgTypes), + SISDMAP1(vceqd, f64, aarch64_neon_fceq, VectorRet | Add2ArgTypes), SISDMAP1(vceqd, s64, aarch64_neon_vceq, VectorRetGetArgs01), SISDMAP1(vceqd, u64, aarch64_neon_vceq, VectorRetGetArgs01), - SISDMAP1(vceqs, f32, aarch64_neon_fceq, VectorRet | ScalarArg0 | ScalarArg1), + SISDMAP1(vceqs, f32, aarch64_neon_fceq, VectorRet | Add2ArgTypes), SISDMAP1(vceqzd, f64, aarch64_neon_fceq, FpCmpzModifiers), SISDMAP1(vceqzd, s64, aarch64_neon_vceq, VectorRetGetArgs01), SISDMAP1(vceqzd, u64, aarch64_neon_vceq, VectorRetGetArgs01), SISDMAP1(vceqzs, f32, aarch64_neon_fceq, FpCmpzModifiers), - SISDMAP1(vcged, f64, aarch64_neon_fcge, VectorRet | ScalarArg0 | ScalarArg1), + SISDMAP1(vcged, f64, aarch64_neon_fcge, VectorRet | Add2ArgTypes), SISDMAP1(vcged, s64, aarch64_neon_vcge, VectorRetGetArgs01), SISDMAP1(vcged, u64, aarch64_neon_vchs, VectorRetGetArgs01), - SISDMAP1(vcges, f32, aarch64_neon_fcge, VectorRet | ScalarArg0 | ScalarArg1), + SISDMAP1(vcges, f32, aarch64_neon_fcge, VectorRet | Add2ArgTypes), SISDMAP1(vcgezd, f64, aarch64_neon_fcge, FpCmpzModifiers), SISDMAP1(vcgezd, s64, aarch64_neon_vcge, VectorRetGetArgs01), SISDMAP1(vcgezs, f32, aarch64_neon_fcge, FpCmpzModifiers), - SISDMAP1(vcgtd, f64, aarch64_neon_fcgt, VectorRet | ScalarArg0 | ScalarArg1), + SISDMAP1(vcgtd, f64, aarch64_neon_fcgt, VectorRet | Add2ArgTypes), SISDMAP1(vcgtd, s64, aarch64_neon_vcgt, VectorRetGetArgs01), SISDMAP1(vcgtd, u64, aarch64_neon_vchi, VectorRetGetArgs01), - SISDMAP1(vcgts, f32, aarch64_neon_fcgt, VectorRet | ScalarArg0 | ScalarArg1), + SISDMAP1(vcgts, f32, aarch64_neon_fcgt, VectorRet | Add2ArgTypes), SISDMAP1(vcgtzd, f64, aarch64_neon_fcgt, FpCmpzModifiers), SISDMAP1(vcgtzd, s64, aarch64_neon_vcgt, VectorRetGetArgs01), SISDMAP1(vcgtzs, f32, aarch64_neon_fcgt, FpCmpzModifiers), - SISDMAP1(vcled, f64, aarch64_neon_fcge, VectorRet | ScalarArg0 | ScalarArg1), + SISDMAP1(vcled, f64, aarch64_neon_fcge, VectorRet | Add2ArgTypes), SISDMAP1(vcled, s64, aarch64_neon_vcge, VectorRetGetArgs01), SISDMAP1(vcled, u64, aarch64_neon_vchs, VectorRetGetArgs01), - SISDMAP1(vcles, f32, aarch64_neon_fcge, VectorRet | ScalarArg0 | ScalarArg1), + SISDMAP1(vcles, f32, aarch64_neon_fcge, VectorRet | Add2ArgTypes), SISDMAP1(vclezd, f64, aarch64_neon_fclez, FpCmpzModifiers), SISDMAP1(vclezd, s64, aarch64_neon_vclez, VectorRetGetArgs01), SISDMAP1(vclezs, f32, aarch64_neon_fclez, FpCmpzModifiers), - SISDMAP1(vcltd, f64, aarch64_neon_fcgt, VectorRet | ScalarArg0 | ScalarArg1), + SISDMAP1(vcltd, f64, aarch64_neon_fcgt, VectorRet | Add2ArgTypes), SISDMAP1(vcltd, s64, aarch64_neon_vcgt, VectorRetGetArgs01), SISDMAP1(vcltd, u64, aarch64_neon_vchi, VectorRetGetArgs01), - SISDMAP1(vclts, f32, aarch64_neon_fcgt, VectorRet | ScalarArg0 | ScalarArg1), + SISDMAP1(vclts, f32, aarch64_neon_fcgt, VectorRet | Add2ArgTypes), SISDMAP1(vcltzd, f64, aarch64_neon_fcltz, FpCmpzModifiers), SISDMAP1(vcltzd, s64, aarch64_neon_vcltz, VectorRetGetArgs01), SISDMAP1(vcltzs, f32, aarch64_neon_fcltz, FpCmpzModifiers), - SISDMAP1(vcvtad_s64, f64, aarch64_neon_fcvtas, VectorRet | ScalarArg1), - SISDMAP1(vcvtad_u64, f64, aarch64_neon_fcvtau, VectorRet | ScalarArg1), - SISDMAP1(vcvtas_s32, f32, aarch64_neon_fcvtas, VectorRet | ScalarArg1), - SISDMAP1(vcvtas_u32, f32, aarch64_neon_fcvtau, VectorRet | ScalarArg1), - SISDMAP1(vcvtd_f64, s64, aarch64_neon_vcvtint2fps, ScalarRet | VectorGetArg0), - SISDMAP1(vcvtd_f64, u64, aarch64_neon_vcvtint2fpu, ScalarRet | VectorGetArg0), - SISDMAP1(vcvtd_n_f64, s64, aarch64_neon_vcvtfxs2fp_n, ScalarRet | VectorGetArg0), - SISDMAP1(vcvtd_n_f64, u64, aarch64_neon_vcvtfxu2fp_n, ScalarRet | VectorGetArg0), - SISDMAP1(vcvtd_n_s64, f64, aarch64_neon_vcvtfp2fxs_n, VectorRet | ScalarArg0), - SISDMAP1(vcvtd_n_u64, f64, aarch64_neon_vcvtfp2fxu_n, VectorRet | ScalarArg0), - SISDMAP1(vcvtd_s64, f64, aarch64_neon_fcvtzs, VectorRet | ScalarArg1), - SISDMAP1(vcvtd_u64, f64, aarch64_neon_fcvtzu, VectorRet | ScalarArg1), - SISDMAP1(vcvtmd_s64, f64, aarch64_neon_fcvtms, VectorRet | ScalarArg1), - SISDMAP1(vcvtmd_u64, f64, aarch64_neon_fcvtmu, VectorRet | ScalarArg1), - SISDMAP1(vcvtms_s32, f32, aarch64_neon_fcvtms, VectorRet | ScalarArg1), - SISDMAP1(vcvtms_u32, f32, aarch64_neon_fcvtmu, VectorRet | ScalarArg1), - SISDMAP1(vcvtnd_s64, f64, aarch64_neon_fcvtns, VectorRet | ScalarArg1), - SISDMAP1(vcvtnd_u64, f64, aarch64_neon_fcvtnu, VectorRet | ScalarArg1), - SISDMAP1(vcvtns_s32, f32, aarch64_neon_fcvtns, VectorRet | ScalarArg1), - SISDMAP1(vcvtns_u32, f32, aarch64_neon_fcvtnu, VectorRet | ScalarArg1), - SISDMAP1(vcvtpd_s64, f64, aarch64_neon_fcvtps, VectorRet | ScalarArg1), - SISDMAP1(vcvtpd_u64, f64, aarch64_neon_fcvtpu, VectorRet | ScalarArg1), - SISDMAP1(vcvtps_s32, f32, aarch64_neon_fcvtps, VectorRet | ScalarArg1), - SISDMAP1(vcvtps_u32, f32, aarch64_neon_fcvtpu, VectorRet | ScalarArg1), - SISDMAP1(vcvts_f32, s32, aarch64_neon_vcvtint2fps, ScalarRet | VectorGetArg0), - SISDMAP1(vcvts_f32, u32, aarch64_neon_vcvtint2fpu, ScalarRet | VectorGetArg0), - SISDMAP1(vcvts_n_f32, s32, aarch64_neon_vcvtfxs2fp_n, ScalarRet | VectorGetArg0), - SISDMAP1(vcvts_n_f32, u32, aarch64_neon_vcvtfxu2fp_n, ScalarRet | VectorGetArg0), - SISDMAP1(vcvts_n_s32, f32, aarch64_neon_vcvtfp2fxs_n, VectorRet | ScalarArg0), - SISDMAP1(vcvts_n_u32, f32, aarch64_neon_vcvtfp2fxu_n, VectorRet | ScalarArg0), - SISDMAP1(vcvts_s32, f32, aarch64_neon_fcvtzs, VectorRet | ScalarArg1), - SISDMAP1(vcvts_u32, f32, aarch64_neon_fcvtzu, VectorRet | ScalarArg1), + SISDMAP1(vcvtad_s64, f64, aarch64_neon_fcvtas, VectorRet | Add1ArgType), + SISDMAP1(vcvtad_u64, f64, aarch64_neon_fcvtau, VectorRet | Add1ArgType), + SISDMAP1(vcvtas_s32, f32, aarch64_neon_fcvtas, VectorRet | Add1ArgType), + SISDMAP1(vcvtas_u32, f32, aarch64_neon_fcvtau, VectorRet | Add1ArgType), + SISDMAP1(vcvtd_f64, s64, aarch64_neon_vcvtint2fps, AddRetType | Vectorize1ArgType), + SISDMAP1(vcvtd_f64, u64, aarch64_neon_vcvtint2fpu, AddRetType | Vectorize1ArgType), + SISDMAP1(vcvtd_n_f64, s64, aarch64_neon_vcvtfxs2fp_n, AddRetType | Vectorize1ArgType), + SISDMAP1(vcvtd_n_f64, u64, aarch64_neon_vcvtfxu2fp_n, AddRetType | Vectorize1ArgType), + SISDMAP1(vcvtd_n_s64, f64, aarch64_neon_vcvtfp2fxs_n, VectorRet | Add1ArgType), + SISDMAP1(vcvtd_n_u64, f64, aarch64_neon_vcvtfp2fxu_n, VectorRet | Add1ArgType), + SISDMAP1(vcvtd_s64, f64, aarch64_neon_fcvtzs, VectorRet | Add1ArgType), + SISDMAP1(vcvtd_u64, f64, aarch64_neon_fcvtzu, VectorRet | Add1ArgType), + SISDMAP1(vcvtmd_s64, f64, aarch64_neon_fcvtms, VectorRet | Add1ArgType), + SISDMAP1(vcvtmd_u64, f64, aarch64_neon_fcvtmu, VectorRet | Add1ArgType), + SISDMAP1(vcvtms_s32, f32, aarch64_neon_fcvtms, VectorRet | Add1ArgType), + SISDMAP1(vcvtms_u32, f32, aarch64_neon_fcvtmu, VectorRet | Add1ArgType), + SISDMAP1(vcvtnd_s64, f64, aarch64_neon_fcvtns, VectorRet | Add1ArgType), + SISDMAP1(vcvtnd_u64, f64, aarch64_neon_fcvtnu, VectorRet | Add1ArgType), + SISDMAP1(vcvtns_s32, f32, aarch64_neon_fcvtns, VectorRet | Add1ArgType), + SISDMAP1(vcvtns_u32, f32, aarch64_neon_fcvtnu, VectorRet | Add1ArgType), + SISDMAP1(vcvtpd_s64, f64, aarch64_neon_fcvtps, VectorRet | Add1ArgType), + SISDMAP1(vcvtpd_u64, f64, aarch64_neon_fcvtpu, VectorRet | Add1ArgType), + SISDMAP1(vcvtps_s32, f32, aarch64_neon_fcvtps, VectorRet | Add1ArgType), + SISDMAP1(vcvtps_u32, f32, aarch64_neon_fcvtpu, VectorRet | Add1ArgType), + SISDMAP1(vcvts_f32, s32, aarch64_neon_vcvtint2fps, AddRetType | Vectorize1ArgType), + SISDMAP1(vcvts_f32, u32, aarch64_neon_vcvtint2fpu, AddRetType | Vectorize1ArgType), + SISDMAP1(vcvts_n_f32, s32, aarch64_neon_vcvtfxs2fp_n, AddRetType | Vectorize1ArgType), + SISDMAP1(vcvts_n_f32, u32, aarch64_neon_vcvtfxu2fp_n, AddRetType | Vectorize1ArgType), + SISDMAP1(vcvts_n_s32, f32, aarch64_neon_vcvtfp2fxs_n, VectorRet | Add1ArgType), + SISDMAP1(vcvts_n_u32, f32, aarch64_neon_vcvtfp2fxu_n, VectorRet | Add1ArgType), + SISDMAP1(vcvts_s32, f32, aarch64_neon_fcvtzs, VectorRet | Add1ArgType), + SISDMAP1(vcvts_u32, f32, aarch64_neon_fcvtzu, VectorRet | Add1ArgType), SISDMAP1(vcvtxd_f32, f64, aarch64_neon_fcvtxn, 0), SISDMAP0(vdupb_lane, i8), SISDMAP0(vdupb_laneq, i8), @@ -2621,59 +2624,59 @@ static const NeonSISDIntrinsicInfo AArch64SISDIntrinsicInfo[] = { SISDMAP0(vgetq_lane, i32), SISDMAP0(vgetq_lane, i64), SISDMAP0(vgetq_lane, i8), - SISDMAP1(vmaxnmv, f32, aarch64_neon_vpfmaxnm, ScalarRet | VectorCastArg0), + SISDMAP1(vmaxnmv, f32, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType), SISDMAP1(vmaxnmvq, f32, aarch64_neon_vmaxnmv, 0), - SISDMAP1(vmaxnmvq, f64, aarch64_neon_vpfmaxnm, ScalarRet | VectorCastArg0), - SISDMAP1(vmaxv, f32, aarch64_neon_vpmax, ScalarRet | VectorCastArg0), - SISDMAP1(vmaxv, s16, aarch64_neon_smaxv, VectorRet | VectorCastArg1), - SISDMAP1(vmaxv, s32, aarch64_neon_smaxv, VectorRet | VectorCastArg1), - SISDMAP1(vmaxv, s8, aarch64_neon_smaxv, VectorRet | VectorCastArg1), - SISDMAP1(vmaxv, u16, aarch64_neon_umaxv, VectorRet | VectorCastArg1), - SISDMAP1(vmaxv, u32, aarch64_neon_umaxv, VectorRet | VectorCastArg1), - SISDMAP1(vmaxv, u8, aarch64_neon_umaxv, VectorRet | VectorCastArg1), + SISDMAP1(vmaxnmvq, f64, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType), + SISDMAP1(vmaxv, f32, aarch64_neon_vpmax, AddRetType | Add1ArgType), + SISDMAP1(vmaxv, s16, aarch64_neon_smaxv, VectorRet | Add1ArgType), + SISDMAP1(vmaxv, s32, aarch64_neon_smaxv, VectorRet | Add1ArgType), + SISDMAP1(vmaxv, s8, aarch64_neon_smaxv, VectorRet | Add1ArgType), + SISDMAP1(vmaxv, u16, aarch64_neon_umaxv, VectorRet | Add1ArgType), + SISDMAP1(vmaxv, u32, aarch64_neon_umaxv, VectorRet | Add1ArgType), + SISDMAP1(vmaxv, u8, aarch64_neon_umaxv, VectorRet | Add1ArgType), SISDMAP1(vmaxvq, f32, aarch64_neon_vmaxv, 0), - SISDMAP1(vmaxvq, f64, aarch64_neon_vpmax, ScalarRet | VectorCastArg0), - SISDMAP1(vmaxvq, s16, aarch64_neon_smaxv, VectorRet | VectorCastArg1), - SISDMAP1(vmaxvq, s32, aarch64_neon_smaxv, VectorRet | VectorCastArg1), - SISDMAP1(vmaxvq, s8, aarch64_neon_smaxv, VectorRet | VectorCastArg1), - SISDMAP1(vmaxvq, u16, aarch64_neon_umaxv, VectorRet | VectorCastArg1), - SISDMAP1(vmaxvq, u32, aarch64_neon_umaxv, VectorRet | VectorCastArg1), - SISDMAP1(vmaxvq, u8, aarch64_neon_umaxv, VectorRet | VectorCastArg1), - SISDMAP1(vminnmv, f32, aarch64_neon_vpfminnm, ScalarRet | VectorCastArg0), + SISDMAP1(vmaxvq, f64, aarch64_neon_vpmax, AddRetType | Add1ArgType), + SISDMAP1(vmaxvq, s16, aarch64_neon_smaxv, VectorRet | Add1ArgType), + SISDMAP1(vmaxvq, s32, aarch64_neon_smaxv, VectorRet | Add1ArgType), + SISDMAP1(vmaxvq, s8, aarch64_neon_smaxv, VectorRet | Add1ArgType), + SISDMAP1(vmaxvq, u16, aarch64_neon_umaxv, VectorRet | Add1ArgType), + SISDMAP1(vmaxvq, u32, aarch64_neon_umaxv, VectorRet | Add1ArgType), + SISDMAP1(vmaxvq, u8, aarch64_neon_umaxv, VectorRet | Add1ArgType), + SISDMAP1(vminnmv, f32, aarch64_neon_vpfminnm, AddRetType | Add1ArgType), SISDMAP1(vminnmvq, f32, aarch64_neon_vminnmv, 0), - SISDMAP1(vminnmvq, f64, aarch64_neon_vpfminnm, ScalarRet | VectorCastArg0), - SISDMAP1(vminv, f32, aarch64_neon_vpmin, ScalarRet | VectorCastArg0), - SISDMAP1(vminv, s16, aarch64_neon_sminv, VectorRet | VectorCastArg1), - SISDMAP1(vminv, s32, aarch64_neon_sminv, VectorRet | VectorCastArg1), - SISDMAP1(vminv, s8, aarch64_neon_sminv, VectorRet | VectorCastArg1), - SISDMAP1(vminv, u16, aarch64_neon_uminv, VectorRet | VectorCastArg1), - SISDMAP1(vminv, u32, aarch64_neon_uminv, VectorRet | VectorCastArg1), - SISDMAP1(vminv, u8, aarch64_neon_uminv, VectorRet | VectorCastArg1), + SISDMAP1(vminnmvq, f64, aarch64_neon_vpfminnm, AddRetType | Add1ArgType), + SISDMAP1(vminv, f32, aarch64_neon_vpmin, AddRetType | Add1ArgType), + SISDMAP1(vminv, s16, aarch64_neon_sminv, VectorRet | Add1ArgType), + SISDMAP1(vminv, s32, aarch64_neon_sminv, VectorRet | Add1ArgType), + SISDMAP1(vminv, s8, aarch64_neon_sminv, VectorRet | Add1ArgType), + SISDMAP1(vminv, u16, aarch64_neon_uminv, VectorRet | Add1ArgType), + SISDMAP1(vminv, u32, aarch64_neon_uminv, VectorRet | Add1ArgType), + SISDMAP1(vminv, u8, aarch64_neon_uminv, VectorRet | Add1ArgType), SISDMAP1(vminvq, f32, aarch64_neon_vminv, 0), - SISDMAP1(vminvq, f64, aarch64_neon_vpmin, ScalarRet | VectorCastArg0), - SISDMAP1(vminvq, s16, aarch64_neon_sminv, VectorRet | VectorCastArg1), - SISDMAP1(vminvq, s32, aarch64_neon_sminv, VectorRet | VectorCastArg1), - SISDMAP1(vminvq, s8, aarch64_neon_sminv, VectorRet | VectorCastArg1), - SISDMAP1(vminvq, u16, aarch64_neon_uminv, VectorRet | VectorCastArg1), - SISDMAP1(vminvq, u32, aarch64_neon_uminv, VectorRet | VectorCastArg1), - SISDMAP1(vminvq, u8, aarch64_neon_uminv, VectorRet | VectorCastArg1), + SISDMAP1(vminvq, f64, aarch64_neon_vpmin, AddRetType | Add1ArgType), + SISDMAP1(vminvq, s16, aarch64_neon_sminv, VectorRet | Add1ArgType), + SISDMAP1(vminvq, s32, aarch64_neon_sminv, VectorRet | Add1ArgType), + SISDMAP1(vminvq, s8, aarch64_neon_sminv, VectorRet | Add1ArgType), + SISDMAP1(vminvq, u16, aarch64_neon_uminv, VectorRet | Add1ArgType), + SISDMAP1(vminvq, u32, aarch64_neon_uminv, VectorRet | Add1ArgType), + SISDMAP1(vminvq, u8, aarch64_neon_uminv, VectorRet | Add1ArgType), SISDMAP0(vmul_n, f64), SISDMAP1(vmull, p64, aarch64_neon_vmull_p64, 0), SISDMAP0(vmulxd, f64), SISDMAP0(vmulxs, f32), SISDMAP1(vnegd, s64, aarch64_neon_vneg, 0), - SISDMAP1(vpaddd, f64, aarch64_neon_vpfadd, ScalarRet | VectorCastArg0), + SISDMAP1(vpaddd, f64, aarch64_neon_vpfadd, AddRetType | Add1ArgType), SISDMAP1(vpaddd, s64, aarch64_neon_vpadd, 0), SISDMAP1(vpaddd, u64, aarch64_neon_vpadd, 0), - SISDMAP1(vpadds, f32, aarch64_neon_vpfadd, ScalarRet | VectorCastArg0), - SISDMAP1(vpmaxnmqd, f64, aarch64_neon_vpfmaxnm, ScalarRet | VectorCastArg0), - SISDMAP1(vpmaxnms, f32, aarch64_neon_vpfmaxnm, ScalarRet | VectorCastArg0), - SISDMAP1(vpmaxqd, f64, aarch64_neon_vpmax, ScalarRet | VectorCastArg0), - SISDMAP1(vpmaxs, f32, aarch64_neon_vpmax, ScalarRet | VectorCastArg0), - SISDMAP1(vpminnmqd, f64, aarch64_neon_vpfminnm, ScalarRet | VectorCastArg0), - SISDMAP1(vpminnms, f32, aarch64_neon_vpfminnm, ScalarRet | VectorCastArg0), - SISDMAP1(vpminqd, f64, aarch64_neon_vpmin, ScalarRet | VectorCastArg0), - SISDMAP1(vpmins, f32, aarch64_neon_vpmin, ScalarRet | VectorCastArg0), + SISDMAP1(vpadds, f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType), + SISDMAP1(vpmaxnmqd, f64, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType), + SISDMAP1(vpmaxnms, f32, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType), + SISDMAP1(vpmaxqd, f64, aarch64_neon_vpmax, AddRetType | Add1ArgType), + SISDMAP1(vpmaxs, f32, aarch64_neon_vpmax, AddRetType | Add1ArgType), + SISDMAP1(vpminnmqd, f64, aarch64_neon_vpfminnm, AddRetType | Add1ArgType), + SISDMAP1(vpminnms, f32, aarch64_neon_vpfminnm, AddRetType | Add1ArgType), + SISDMAP1(vpminqd, f64, aarch64_neon_vpmin, AddRetType | Add1ArgType), + SISDMAP1(vpmins, f32, aarch64_neon_vpmin, AddRetType | Add1ArgType), SISDMAP1(vqabsb, s8, arm_neon_vqabs, VectorRet), SISDMAP1(vqabsd, s64, arm_neon_vqabs, VectorRet), SISDMAP1(vqabsh, s16, arm_neon_vqabs, VectorRet), @@ -2771,20 +2774,20 @@ static const NeonSISDIntrinsicInfo AArch64SISDIntrinsicInfo[] = { SISDMAP1(vqsubh, u16, arm_neon_vqsubu, VectorRet), SISDMAP1(vqsubs, s32, arm_neon_vqsubs, VectorRet), SISDMAP1(vqsubs, u32, arm_neon_vqsubu, VectorRet), - SISDMAP1(vrecped, f64, aarch64_neon_vrecpe, ScalarRet), - SISDMAP1(vrecpes, f32, aarch64_neon_vrecpe, ScalarRet), - SISDMAP1(vrecpsd, f64, aarch64_neon_vrecps, ScalarRet), - SISDMAP1(vrecpss, f32, aarch64_neon_vrecps, ScalarRet), - SISDMAP1(vrecpxd, f64, aarch64_neon_vrecpx, ScalarRet), - SISDMAP1(vrecpxs, f32, aarch64_neon_vrecpx, ScalarRet), + SISDMAP1(vrecped, f64, aarch64_neon_vrecpe, AddRetType), + SISDMAP1(vrecpes, f32, aarch64_neon_vrecpe, AddRetType), + SISDMAP1(vrecpsd, f64, aarch64_neon_vrecps, AddRetType), + SISDMAP1(vrecpss, f32, aarch64_neon_vrecps, AddRetType), + SISDMAP1(vrecpxd, f64, aarch64_neon_vrecpx, AddRetType), + SISDMAP1(vrecpxs, f32, aarch64_neon_vrecpx, AddRetType), SISDMAP1(vrshld, s64, aarch64_neon_vrshlds, 0), SISDMAP1(vrshld, u64, aarch64_neon_vrshldu, 0), SISDMAP1(vrshrd_n, s64, aarch64_neon_vsrshr, VectorRet), SISDMAP1(vrshrd_n, u64, aarch64_neon_vurshr, VectorRet), - SISDMAP1(vrsqrted, f64, aarch64_neon_vrsqrte, ScalarRet), - SISDMAP1(vrsqrtes, f32, aarch64_neon_vrsqrte, ScalarRet), - SISDMAP1(vrsqrtsd, f64, aarch64_neon_vrsqrts, ScalarRet), - SISDMAP1(vrsqrtss, f32, aarch64_neon_vrsqrts, ScalarRet), + SISDMAP1(vrsqrted, f64, aarch64_neon_vrsqrte, AddRetType), + SISDMAP1(vrsqrtes, f32, aarch64_neon_vrsqrte, AddRetType), + SISDMAP1(vrsqrtsd, f64, aarch64_neon_vrsqrts, AddRetType), + SISDMAP1(vrsqrtss, f32, aarch64_neon_vrsqrts, AddRetType), SISDMAP1(vrsrad_n, s64, aarch64_neon_vrsrads_n, 0), SISDMAP1(vrsrad_n, u64, aarch64_neon_vrsradu_n, 0), SISDMAP0(vset_lane, f32), @@ -2836,6 +2839,36 @@ static const NeonSISDIntrinsicInfo AArch64SISDIntrinsicInfo[] = { static bool AArch64SISDIntrinsicInfoProvenSorted = false; #endif +Function *CodeGenFunction::LookupNeonIntrinsic(unsigned IntrinsicID, + unsigned Modifier, + llvm::Type *ArgType, + const CallExpr *E) { + // Return type. + SmallVector Tys; + if (Modifier & AddRetType) { + llvm::Type *Ty = ConvertType(E->getCallReturnType()); + if (Modifier & VectorizeRetType) + Ty = llvm::VectorType::get(Ty, 1); + + Tys.push_back(Ty); + } + + // Arguments. + if (Modifier & VectorizeArgTypes) + ArgType = llvm::VectorType::get(ArgType, 1); + + if (Modifier & (Add1ArgType | Add2ArgTypes)) + Tys.push_back(ArgType); + + if (Modifier & Add2ArgTypes) + Tys.push_back(ArgType); + + if (Modifier & InventFloatType) + Tys.push_back(FloatTy); + + return CGM.getIntrinsic(IntrinsicID, Tys); +} + static Value * EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, const NeonSISDIntrinsicInfo &SISDInfo, @@ -3017,63 +3050,9 @@ EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, return 0; // Determine the type(s) of this overloaded AArch64 intrinsic. - Function *F = 0; - SmallVector Tys; - - // Return type. - if (IntTypes & (ScalarRet | VectorRet)) { - llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType()); - if (IntTypes & ScalarRet) { - // Scalar return value. - Tys.push_back(Ty); - } else if (IntTypes & VectorRet) { - // Convert the scalar return type to one-vector element type. - Tys.push_back(llvm::VectorType::get(Ty, 1)); - } - } - - // Arguments. - if (IntTypes & (ScalarArg0 | VectorGetArg0 | VectorCastArg0)) { - const Expr *Arg = E->getArg(0); - llvm::Type *Ty = CGF.ConvertType(Arg->getType()); - if (IntTypes & ScalarArg0) { - // Scalar argument. - Tys.push_back(Ty); - } else if (IntTypes & VectorGetArg0) { - // Convert the scalar argument to one-vector element type. - Tys.push_back(llvm::VectorType::get(Ty, 1)); - } else if (IntTypes & VectorCastArg0) { - // Cast the argument to vector type. - Tys.push_back(cast(Ty)); - } - } - - // The only intrinsics that require a 2nd argument are the compare intrinsics. - // However, the builtins don't always have a 2nd argument (e.g., - // floating-point compare to zero), so we inspect the first argument to - // determine the type. - if (IntTypes & (ScalarArg1 | VectorGetArg1 | VectorCastArg1)) { - const Expr *Arg = E->getArg(0); - llvm::Type *Ty = CGF.ConvertType(Arg->getType()); - if (IntTypes & ScalarArg1) { - // Scalar argument. - Tys.push_back(Ty); - } else if (IntTypes & VectorGetArg1) { - // Convert the scalar argument to one-vector element type. - Tys.push_back(llvm::VectorType::get(Ty, 1)); - } else if (IntTypes & VectorCastArg1) { - // Cast the argument to a vector type. - Tys.push_back(cast(Ty)); - } - } else if (IntTypes & ScalarFpCmpzArg1) { - // Floating-point zero argument. - Tys.push_back(CGF.FloatTy); - } - - if (IntTypes) - F = CGF.CGM.getIntrinsic(Int, Tys); - else - F = CGF.CGM.getIntrinsic(Int); + const Expr *Arg = E->getArg(0); + llvm::Type *ArgTy = CGF.ConvertType(Arg->getType()); + Function *F = CGF.LookupNeonIntrinsic(Int, IntTypes, ArgTy, E); Value *Result = CGF.EmitNeonCall(F, Ops, s); llvm::Type *ResultType = CGF.ConvertType(E->getType()); diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 185e25ccb2..46eea8f960 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -2182,6 +2182,8 @@ public: llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl &Ops, llvm::Value *Align = 0); + llvm::Function *LookupNeonIntrinsic(unsigned IntrinsicID, unsigned Modifier, + llvm::Type *ArgTy, const CallExpr *E); llvm::Value *EmitNeonCall(llvm::Function *F, SmallVectorImpl &O, const char *name, -- 2.40.0