From 8137a607eebe799d95fd05226fb91119a5b054b0 Mon Sep 17 00:00:00 2001 From: Kevin Qin Date: Thu, 14 Nov 2013 02:45:18 +0000 Subject: [PATCH] Implement aarch64 neon instruction class misc. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@194657 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/arm_neon.td | 85 ++++++++++ lib/CodeGen/CGBuiltin.cpp | 278 ++++++++++++++++++++++++++++++++ lib/CodeGen/CodeGenFunction.h | 5 + utils/TableGen/NeonEmitter.cpp | 70 +++++++- 4 files changed, 437 insertions(+), 1 deletion(-) diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td index 91cb3eb308..83ed8289fb 100644 --- a/include/clang/Basic/arm_neon.td +++ b/include/clang/Basic/arm_neon.td @@ -87,6 +87,12 @@ def OP_SEL : Op; def OP_REV64 : Op; def OP_REV32 : Op; def OP_REV16 : Op; +def OP_XTN : Op; +def OP_SQXTUN : Op; +def OP_QXTN : Op; +def OP_VCVT_NA_HI : Op; +def OP_VCVT_EX_HI : Op; +def OP_VCVTX_HI : Op; def OP_REINT : Op; def OP_ADDHNHi : Op; def OP_RADDHNHi : Op; @@ -572,12 +578,84 @@ def BSL : SInst<"vbsl", "dudd", "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPsQd // With additional Qd type. def ABD : SInst<"vabd", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">; +//////////////////////////////////////////////////////////////////////////////// +// saturating absolute/negate +// With additional Qd/Ql type. +def ABS : SInst<"vabs", "dd", "csifQcQsQiQfQlQd">; +def QABS : SInst<"vqabs", "dd", "csiQcQsQiQl">; +def NEG : SOpInst<"vneg", "dd", "csifQcQsQiQfQdQl", OP_NEG>; +def QNEG : SInst<"vqneg", "dd", "csiQcQsQiQl">; + +//////////////////////////////////////////////////////////////////////////////// +// Signed Saturating Accumulated of Unsigned Value +def SUQADD : SInst<"vuqadd", "ddd", "csiQcQsQiQl">; + +//////////////////////////////////////////////////////////////////////////////// +// Unsigned Saturating Accumulated of Signed Value +def USQADD : SInst<"vsqadd", "ddd", "UcUsUiQUcQUsQUiQUl">; + //////////////////////////////////////////////////////////////////////////////// // Reciprocal/Sqrt // With additional Qd type. def FRECPS : IInst<"vrecps", "ddd", "fQfQd">; def FRSQRTS : IInst<"vrsqrts", "ddd", "fQfQd">; +//////////////////////////////////////////////////////////////////////////////// +// bitwise reverse +def RBIT : IInst<"vrbit", "dd", "cUcPcQcQUcQPc">; + +//////////////////////////////////////////////////////////////////////////////// +// Integer extract and narrow to high +def XTN2 : SOpInst<"vmovn_high", "qhk", "silUsUiUl", OP_XTN>; + +//////////////////////////////////////////////////////////////////////////////// +// Signed integer saturating extract and unsigned narrow to high +def SQXTUN2 : SOpInst<"vqmovun_high", "qhk", "sil", OP_SQXTUN>; + +//////////////////////////////////////////////////////////////////////////////// +// Integer saturating extract and narrow to high +def QXTN2 : SOpInst<"vqmovn_high", "qhk", "silUsUiUl", OP_QXTN>; + +//////////////////////////////////////////////////////////////////////////////// +// Converting vectors +def VCVT_HIGH_F16 : SOpInst<"vcvt_high_f16", "qhj", "f", OP_VCVT_NA_HI>; +def VCVT_HIGH_F32_F16 : SOpInst<"vcvt_high_f32", "wk", "h", OP_VCVT_EX_HI>; +def VCVT_F32_F64 : SInst<"vcvt_f32_f64", "fj", "d">; +def VCVT_HIGH_F32_F64 : SOpInst<"vcvt_high_f32", "qfj", "d", OP_VCVT_NA_HI>; +def VCVT_F64_F32 : SInst<"vcvt_f64", "wd", "f">; +def VCVT_F64 : SInst<"vcvt_f64", "fd", "QlQUl">; +def VCVT_HIGH_F64_F32 : SOpInst<"vcvt_high_f64", "wj", "f", OP_VCVT_EX_HI>; +def VCVTX_F32_F64 : SInst<"vcvtx_f32", "fj", "d">; +def VCVTX_HIGH_F32_F64 : SOpInst<"vcvtx_high_f32", "qfj", "d", OP_VCVTX_HI>; +def FRINTN : SInst<"vrndn", "dd", "fQfQd">; +def FRINTA : SInst<"vrnda", "dd", "fQfQd">; +def FRINTP : SInst<"vrndp", "dd", "fQfQd">; +def FRINTM : SInst<"vrndm", "dd", "fQfQd">; +def FRINTX : SInst<"vrndx", "dd", "fQfQd">; +def FRINTZ : SInst<"vrnd", "dd", "fQfQd">; +def FRINTI : SInst<"vrndi", "dd", "fQfQd">; +def VCVT_S64 : SInst<"vcvt_s64", "xd", "Qd">; +def VCVT_U64 : SInst<"vcvt_u64", "ud", "Qd">; +def FCVTNS_S32 : SInst<"vcvtn_s32", "xd", "fQf">; +def FCVTNS_S64 : SInst<"vcvtn_s64", "xd", "Qd">; +def FCVTNU_S32 : SInst<"vcvtn_u32", "ud", "fQf">; +def FCVTNU_S64 : SInst<"vcvtn_u64", "ud", "Qd">; +def FCVTPS_S32 : SInst<"vcvtp_s32", "xd", "fQf">; +def FCVTPS_S64 : SInst<"vcvtp_s64", "xd", "Qd">; +def FCVTPU_S32 : SInst<"vcvtp_u32", "ud", "fQf">; +def FCVTPU_S64 : SInst<"vcvtp_u64", "ud", "Qd">; +def FCVTMS_S32 : SInst<"vcvtm_s32", "xd", "fQf">; +def FCVTMS_S64 : SInst<"vcvtm_s64", "xd", "Qd">; +def FCVTMU_S32 : SInst<"vcvtm_u32", "ud", "fQf">; +def FCVTMU_S64 : SInst<"vcvtm_u64", "ud", "Qd">; +def FCVTAS_S32 : SInst<"vcvta_s32", "xd", "fQf">; +def FCVTAS_S64 : SInst<"vcvta_s64", "xd", "Qd">; +def FCVTAU_S32 : SInst<"vcvta_u32", "ud", "fQf">; +def FCVTAU_S64 : SInst<"vcvta_u64", "ud", "Qd">; +def FRECPE : SInst<"vrecpe", "dd", "fUiQfQUiQd">; +def FRSQRTE : SInst<"vrsqrte", "dd", "fUiQfQUiQd">; +def FSQRT : SInst<"vsqrt", "dd", "fQfQd">; + //////////////////////////////////////////////////////////////////////////////// // Comparison // With additional Qd type. @@ -594,6 +672,13 @@ def CFMLE : SOpInst<"vcle", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_LE> def CFMGT : SOpInst<"vcgt", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_GT>; def CFMLT : SOpInst<"vclt", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_LT>; +def CMEQ : SInst<"vceqz", "ud", + "csifUcUsUiPcPsQcQsQiQlQfQUcQUsQUiQUlQPcQPsQd">; +def CMGE : SInst<"vcgez", "ud", "csifdQcQsQiQlQfQd">; +def CMLE : SInst<"vclez", "ud", "csifdQcQsQiQlQfQd">; +def CMGT : SInst<"vcgtz", "ud", "csifdQcQsQiQlQfQd">; +def CMLT : SInst<"vcltz", "ud", "csifdQcQsQiQlQfQd">; + //////////////////////////////////////////////////////////////////////////////// // Max/Min Integer // With additional Qd type. diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index d9ec259ca9..b9c6b4df1d 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -2463,6 +2463,21 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, return CGF.Builder.CreateBitCast(Result, ResultType, s); } +Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr( + Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp, + const CmpInst::Predicate Ip, const Twine &Name) { + llvm::Type *OTy = ((llvm::User *)Op)->getOperand(0)->getType(); + if (OTy->isPointerTy()) + OTy = Ty; + Op = Builder.CreateBitCast(Op, OTy); + if (((llvm::VectorType *)OTy)->getElementType()->isFloatingPointTy()) { + Op = Builder.CreateFCmp(Fp, Op, ConstantAggregateZero::get(OTy)); + } else { + Op = Builder.CreateICmp(Ip, Op, ConstantAggregateZero::get(OTy)); + } + return Builder.CreateZExt(Op, Ty, Name); +} + static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, @@ -3158,6 +3173,269 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Int = Intrinsic::aarch64_neon_vmulx; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); } + case AArch64::BI__builtin_neon_vpaddl_v: + case AArch64::BI__builtin_neon_vpaddlq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpaddl_v, E); + case AArch64::BI__builtin_neon_vpadal_v: + case AArch64::BI__builtin_neon_vpadalq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpadal_v, E); + case AArch64::BI__builtin_neon_vqabs_v: + case AArch64::BI__builtin_neon_vqabsq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqabs_v, E); + case AArch64::BI__builtin_neon_vqneg_v: + case AArch64::BI__builtin_neon_vqnegq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqneg_v, E); + case AArch64::BI__builtin_neon_vabs_v: + case AArch64::BI__builtin_neon_vabsq_v: { + if (VTy->getElementType()->isFloatingPointTy()) { + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); + } + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vabs_v, E); + } + case AArch64::BI__builtin_neon_vsqadd_v: + case AArch64::BI__builtin_neon_vsqaddq_v: { + Int = Intrinsic::aarch64_neon_usqadd; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd"); + } + case AArch64::BI__builtin_neon_vuqadd_v: + case AArch64::BI__builtin_neon_vuqaddq_v: { + Int = Intrinsic::aarch64_neon_suqadd; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); + } + case AArch64::BI__builtin_neon_vcls_v: + case AArch64::BI__builtin_neon_vclsq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcls_v, E); + case AArch64::BI__builtin_neon_vclz_v: + case AArch64::BI__builtin_neon_vclzq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vclz_v, E); + case AArch64::BI__builtin_neon_vcnt_v: + case AArch64::BI__builtin_neon_vcntq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcnt_v, E); + case AArch64::BI__builtin_neon_vrbit_v: + case AArch64::BI__builtin_neon_vrbitq_v: + Int = Intrinsic::aarch64_neon_rbit; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); + case AArch64::BI__builtin_neon_vmovn_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmovn_v, E); + case AArch64::BI__builtin_neon_vqmovun_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqmovun_v, E); + case AArch64::BI__builtin_neon_vqmovn_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqmovn_v, E); + case AArch64::BI__builtin_neon_vcvt_f16_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_f16_v, E); + case AArch64::BI__builtin_neon_vcvt_f32_f16: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_f32_f16, E); + case AArch64::BI__builtin_neon_vcvt_f32_f64: { + Ops[0] = Builder.CreateBitCast(Ops[0], Ty); + Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, false)); + return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); + } + case AArch64::BI__builtin_neon_vcvtx_f32_v: { + llvm::Type *EltTy = FloatTy; + llvm::Type *ResTy = llvm::VectorType::get(EltTy, 2); + llvm::Type *Tys[2] = { ResTy, Ty }; + Int = Intrinsic::aarch64_neon_fcvtxn; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtx_f32_f64"); + } + case AArch64::BI__builtin_neon_vcvt_f64_v: { + llvm::Type *OpTy = + GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, false)); + Ops[0] = Builder.CreateBitCast(Ops[0], OpTy); + return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); + } + case AArch64::BI__builtin_neon_vcvtq_f64_v: { + Ops[0] = Builder.CreateBitCast(Ops[0], Ty); + Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true)); + return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") + : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); + } + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqmovun_v, E); + case AArch64::BI__builtin_neon_vrndn_v: + case AArch64::BI__builtin_neon_vrndnq_v: { + Int = Intrinsic::aarch64_neon_frintn; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); + } + case AArch64::BI__builtin_neon_vrnda_v: + case AArch64::BI__builtin_neon_vrndaq_v: { + Int = Intrinsic::round; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); + } + case AArch64::BI__builtin_neon_vrndp_v: + case AArch64::BI__builtin_neon_vrndpq_v: { + Int = Intrinsic::ceil; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); + } + case AArch64::BI__builtin_neon_vrndm_v: + case AArch64::BI__builtin_neon_vrndmq_v: { + Int = Intrinsic::floor; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); + } + case AArch64::BI__builtin_neon_vrndx_v: + case AArch64::BI__builtin_neon_vrndxq_v: { + Int = Intrinsic::rint; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); + } + case AArch64::BI__builtin_neon_vrnd_v: + case AArch64::BI__builtin_neon_vrndq_v: { + Int = Intrinsic::trunc; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd"); + } + case AArch64::BI__builtin_neon_vrndi_v: + case AArch64::BI__builtin_neon_vrndiq_v: { + Int = Intrinsic::nearbyint; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi"); + } + case AArch64::BI__builtin_neon_vcvt_s32_v: + case AArch64::BI__builtin_neon_vcvt_u32_v: + case AArch64::BI__builtin_neon_vcvtq_s32_v: + case AArch64::BI__builtin_neon_vcvtq_u32_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_u32_v, E); + case AArch64::BI__builtin_neon_vcvtq_s64_v: + case AArch64::BI__builtin_neon_vcvtq_u64_v: { + llvm::Type *DoubleTy = + GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true)); + Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); + return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") + : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); + } + case AArch64::BI__builtin_neon_vcvtn_s32_v: + case AArch64::BI__builtin_neon_vcvtnq_s32_v: { + llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements()); + llvm::Type *Tys[2] = { Ty, OpTy }; + Int = Intrinsic::aarch64_neon_fcvtns; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtns_f32"); + } + case AArch64::BI__builtin_neon_vcvtnq_s64_v: { + llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements()); + llvm::Type *Tys[2] = { Ty, OpTy }; + Int = Intrinsic::aarch64_neon_fcvtns; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtns_f64"); + } + case AArch64::BI__builtin_neon_vcvtn_u32_v: + case AArch64::BI__builtin_neon_vcvtnq_u32_v: { + llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements()); + llvm::Type *Tys[2] = { Ty, OpTy }; + Int = Intrinsic::aarch64_neon_fcvtnu; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtnu_f32"); + } + case AArch64::BI__builtin_neon_vcvtnq_u64_v: { + llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements()); + llvm::Type *Tys[2] = { Ty, OpTy }; + Int = Intrinsic::aarch64_neon_fcvtnu; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtnu_f64"); + } + case AArch64::BI__builtin_neon_vcvtp_s32_v: + case AArch64::BI__builtin_neon_vcvtpq_s32_v: { + llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements()); + llvm::Type *Tys[2] = { Ty, OpTy }; + Int = Intrinsic::aarch64_neon_fcvtps; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtps_f32"); + } + case AArch64::BI__builtin_neon_vcvtpq_s64_v: { + llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements()); + llvm::Type *Tys[2] = { Ty, OpTy }; + Int = Intrinsic::aarch64_neon_fcvtps; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtps_f64"); + } + case AArch64::BI__builtin_neon_vcvtp_u32_v: + case AArch64::BI__builtin_neon_vcvtpq_u32_v: { + llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements()); + llvm::Type *Tys[2] = { Ty, OpTy }; + Int = Intrinsic::aarch64_neon_fcvtpu; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtpu_f32"); + } + case AArch64::BI__builtin_neon_vcvtpq_u64_v: { + llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements()); + llvm::Type *Tys[2] = { Ty, OpTy }; + Int = Intrinsic::aarch64_neon_fcvtpu; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtpu_f64"); + } + case AArch64::BI__builtin_neon_vcvtm_s32_v: + case AArch64::BI__builtin_neon_vcvtmq_s32_v: { + llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements()); + llvm::Type *Tys[2] = { Ty, OpTy }; + Int = Intrinsic::aarch64_neon_fcvtms; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtms_f32"); + } + case AArch64::BI__builtin_neon_vcvtmq_s64_v: { + llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements()); + llvm::Type *Tys[2] = { Ty, OpTy }; + Int = Intrinsic::aarch64_neon_fcvtms; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtms_f64"); + } + case AArch64::BI__builtin_neon_vcvtm_u32_v: + case AArch64::BI__builtin_neon_vcvtmq_u32_v: { + llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements()); + llvm::Type *Tys[2] = { Ty, OpTy }; + Int = Intrinsic::aarch64_neon_fcvtmu; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtmu_f32"); + } + case AArch64::BI__builtin_neon_vcvtmq_u64_v: { + llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements()); + llvm::Type *Tys[2] = { Ty, OpTy }; + Int = Intrinsic::aarch64_neon_fcvtmu; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtmu_f64"); + } + case AArch64::BI__builtin_neon_vcvta_s32_v: + case AArch64::BI__builtin_neon_vcvtaq_s32_v: { + llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements()); + llvm::Type *Tys[2] = { Ty, OpTy }; + Int = Intrinsic::aarch64_neon_fcvtas; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtas_f32"); + } + case AArch64::BI__builtin_neon_vcvtaq_s64_v: { + llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements()); + llvm::Type *Tys[2] = { Ty, OpTy }; + Int = Intrinsic::aarch64_neon_fcvtas; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtas_f64"); + } + case AArch64::BI__builtin_neon_vcvta_u32_v: + case AArch64::BI__builtin_neon_vcvtaq_u32_v: { + llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements()); + llvm::Type *Tys[2] = { Ty, OpTy }; + Int = Intrinsic::aarch64_neon_fcvtau; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtau_f32"); + } + case AArch64::BI__builtin_neon_vcvtaq_u64_v: { + llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements()); + llvm::Type *Tys[2] = { Ty, OpTy }; + Int = Intrinsic::aarch64_neon_fcvtau; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtau_f64"); + } + case AArch64::BI__builtin_neon_vrecpe_v: + case AArch64::BI__builtin_neon_vrecpeq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecpe_v, E); + case AArch64::BI__builtin_neon_vrsqrte_v: + case AArch64::BI__builtin_neon_vrsqrteq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsqrte_v, E); + case AArch64::BI__builtin_neon_vsqrt_v: + case AArch64::BI__builtin_neon_vsqrtq_v: { + Int = Intrinsic::aarch64_neon_fsqrt; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); + } + case AArch64::BI__builtin_neon_vcvt_f32_v: + case AArch64::BI__builtin_neon_vcvtq_f32_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_f32_v, E); + case AArch64::BI__builtin_neon_vceqz_v: + case AArch64::BI__builtin_neon_vceqzq_v: + return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, + ICmpInst::ICMP_EQ, "vceqz"); + case AArch64::BI__builtin_neon_vcgez_v: + case AArch64::BI__builtin_neon_vcgezq_v: + return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, + ICmpInst::ICMP_SGE, "vcgez"); + case AArch64::BI__builtin_neon_vclez_v: + case AArch64::BI__builtin_neon_vclezq_v: + return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, + ICmpInst::ICMP_SLE, "vclez"); + case AArch64::BI__builtin_neon_vcgtz_v: + case AArch64::BI__builtin_neon_vcgtzq_v: + return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, + ICmpInst::ICMP_SGT, "vcgtz"); + case AArch64::BI__builtin_neon_vcltz_v: + case AArch64::BI__builtin_neon_vcltzq_v: + return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, + ICmpInst::ICMP_SLT, "vcltz"); } } diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 98c090fa14..3e623eab48 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -2147,6 +2147,11 @@ public: /// is unhandled by the current target. llvm::Value *EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, + const llvm::CmpInst::Predicate Fp, + const llvm::CmpInst::Predicate Ip, + const llvm::Twine &Name = ""); + llvm::Value *EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty); llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitNeonCall(llvm::Function *F, diff --git a/utils/TableGen/NeonEmitter.cpp b/utils/TableGen/NeonEmitter.cpp index 891cdc66f9..2b1de5e43c 100644 --- a/utils/TableGen/NeonEmitter.cpp +++ b/utils/TableGen/NeonEmitter.cpp @@ -109,6 +109,12 @@ enum OpKind { OpRev16, OpRev32, OpRev64, + OpXtnHi, + OpSqxtunHi, + OpQxtnHi, + OpFcvtnHi, + OpFcvtlHi, + OpFcvtxnHi, OpReinterpret, OpAddhnHi, OpRAddhnHi, @@ -262,6 +268,12 @@ public: OpMap["OP_REV16"] = OpRev16; OpMap["OP_REV32"] = OpRev32; OpMap["OP_REV64"] = OpRev64; + OpMap["OP_XTN"] = OpXtnHi; + OpMap["OP_SQXTUN"] = OpSqxtunHi; + OpMap["OP_QXTN"] = OpQxtnHi; + OpMap["OP_VCVT_NA_HI"] = OpFcvtnHi; + OpMap["OP_VCVT_EX_HI"] = OpFcvtlHi; + OpMap["OP_VCVTX_HI"] = OpFcvtxnHi; OpMap["OP_REINT"] = OpReinterpret; OpMap["OP_ADDHNHi"] = OpAddhnHi; OpMap["OP_RADDHNHi"] = OpRAddhnHi; @@ -372,6 +384,8 @@ static char Widen(const char t) { return 'l'; case 'h': return 'f'; + case 'f': + return 'd'; default: PrintFatalError("unhandled type in widen!"); } @@ -389,6 +403,8 @@ static char Narrow(const char t) { return 'i'; case 'f': return 'h'; + case 'd': + return 'f'; default: PrintFatalError("unhandled type in narrow!"); } @@ -858,7 +874,7 @@ static char Insert_BHSD_Suffix(StringRef typestr){ /// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used. static std::string MangleName(const std::string &name, StringRef typestr, ClassKind ck) { - if (name == "vcvt_f32_f16") + if (name == "vcvt_f32_f16" || name == "vcvt_f32_f64") return name; bool quad = false; @@ -1797,6 +1813,58 @@ static std::string GenOpString(const std::string &name, OpKind op, s += ");"; break; } + case OpXtnHi: { + s = TypeString(proto[1], typestr) + " __a1 = " + + MangleName("vmovn", typestr, ClassS) + "(__b);\n " + + "return __builtin_shufflevector(__a, __a1"; + for (unsigned i = 0; i < nElts * 4; ++i) + s += ", " + utostr(i); + s += ");"; + break; + } + case OpSqxtunHi: { + s = TypeString(proto[1], typestr) + " __a1 = " + + MangleName("vqmovun", typestr, ClassS) + "(__b);\n " + + "return __builtin_shufflevector(__a, __a1"; + for (unsigned i = 0; i < nElts * 4; ++i) + s += ", " + utostr(i); + s += ");"; + break; + } + case OpQxtnHi: { + s = TypeString(proto[1], typestr) + " __a1 = " + + MangleName("vqmovn", typestr, ClassS) + "(__b);\n " + + "return __builtin_shufflevector(__a, __a1"; + for (unsigned i = 0; i < nElts * 4; ++i) + s += ", " + utostr(i); + s += ");"; + break; + } + case OpFcvtnHi: { + std::string FName = (nElts == 1) ? "vcvt_f32" : "vcvt_f16"; + s = TypeString(proto[1], typestr) + " __a1 = " + + MangleName(FName, typestr, ClassS) + "(__b);\n " + + "return __builtin_shufflevector(__a, __a1"; + for (unsigned i = 0; i < nElts * 4; ++i) + s += ", " + utostr(i); + s += ");"; + break; + } + case OpFcvtlHi: { + std::string FName = (nElts == 2) ? "vcvt_f64" : "vcvt_f32"; + s = TypeString('d', typestr) + " __a1 = " + GetHigh("__a", typestr) + + ";\n return " + MangleName(FName, typestr, ClassS) + "(__a1);"; + break; + } + case OpFcvtxnHi: { + s = TypeString(proto[1], typestr) + " __a1 = " + + MangleName("vcvtx_f32", typestr, ClassS) + "(__b);\n " + + "return __builtin_shufflevector(__a, __a1"; + for (unsigned i = 0; i < nElts * 4; ++i) + s += ", " + utostr(i); + s += ");"; + break; + } case OpUzp1: s += "__builtin_shufflevector(__a, __b"; for (unsigned i = 0; i < nElts; i++) -- 2.40.0