From ed1f419909b80c588916ce705d0098cbb8063b34 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Tue, 26 Nov 2013 22:17:51 +0000 Subject: [PATCH] [AArch64] Add support for NEON scalar floating-point to integer convert instructions. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@195789 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/arm_neon.td | 24 ++++ lib/CodeGen/CGBuiltin.cpp | 57 +++++++++ test/CodeGen/aarch64-neon-fcvt-intrinsics.c | 133 ++++++++++++++++++++ 3 files changed, 214 insertions(+) create mode 100644 test/CodeGen/aarch64-neon-fcvt-intrinsics.c diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td index b2da84803c..63aec20b58 100644 --- a/include/clang/Basic/arm_neon.td +++ b/include/clang/Basic/arm_neon.td @@ -1122,6 +1122,30 @@ def SCALAR_SCVTFD : SInst<"vcvt_f64", "os", "Sl">; def SCALAR_UCVTFS : SInst<"vcvt_f32", "ys", "SUi">; def SCALAR_UCVTFD : SInst<"vcvt_f64", "os", "SUl">; +//////////////////////////////////////////////////////////////////////////////// +// Scalar Floating-point Converts +def SCALAR_FCVTXN : IInst<"vcvtx_f32", "ys", "Sd">; +def SCALAR_FCVTNSS : SInst<"vcvtn_s32", "$s", "Sf">; +def SCALAR_FCVTNUS : SInst<"vcvtn_u32", "bs", "Sf">; +def SCALAR_FCVTNSD : SInst<"vcvtn_s64", "$s", "Sd">; +def SCALAR_FCVTNUD : SInst<"vcvtn_u64", "bs", "Sd">; +def SCALAR_FCVTMSS : SInst<"vcvtm_s32", "$s", "Sf">; +def SCALAR_FCVTMUS : SInst<"vcvtm_u32", "bs", "Sf">; +def SCALAR_FCVTMSD : SInst<"vcvtm_s64", "$s", "Sd">; +def SCALAR_FCVTMUD : SInst<"vcvtm_u64", "bs", "Sd">; +def SCALAR_FCVTASS : SInst<"vcvta_s32", "$s", "Sf">; +def SCALAR_FCVTAUS : SInst<"vcvta_u32", "bs", "Sf">; +def SCALAR_FCVTASD : SInst<"vcvta_s64", "$s", "Sd">; +def SCALAR_FCVTAUD : SInst<"vcvta_u64", "bs", "Sd">; +def SCALAR_FCVTPSS : SInst<"vcvtp_s32", "$s", "Sf">; +def SCALAR_FCVTPUS : SInst<"vcvtp_u32", "bs", "Sf">; +def SCALAR_FCVTPSD : SInst<"vcvtp_s64", "$s", "Sd">; +def SCALAR_FCVTPUD : SInst<"vcvtp_u64", "bs", "Sd">; +def SCALAR_FCVTZSS : SInst<"vcvt_s32", "$s", "Sf">; +def SCALAR_FCVTZUS : SInst<"vcvt_u32", "bs", "Sf">; +def SCALAR_FCVTZSD : SInst<"vcvt_s64", "$s", "Sd">; +def SCALAR_FCVTZUD : SInst<"vcvt_u64", "bs", "Sd">; + //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Reciprocal Estimate def SCALAR_FRECPE : IInst<"vrecpe", "ss", "SfSd">; diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index d496c7ae46..9e135490df 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -1759,6 +1759,7 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, bool ExtendEle = false; bool OverloadInt = false; bool OverloadCmpInt = false; + bool OverloadCvtInt = false; bool OverloadWideInt = false; bool OverloadNarrowInt = false; const char *s = NULL; @@ -2121,6 +2122,50 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, case AArch64::BI__builtin_neon_vcvtd_f64_u64: Int = Intrinsic::aarch64_neon_vcvtf64_u64, s = "vcvtf"; OverloadInt = false; break; + // Scalar Floating-point Converts + case AArch64::BI__builtin_neon_vcvtxd_f32_f64: + Int = Intrinsic::aarch64_neon_fcvtxn; + s = "vcvtxn"; OverloadCvtInt = true; break; + case AArch64::BI__builtin_neon_vcvtas_s32_f32: + case AArch64::BI__builtin_neon_vcvtad_s64_f64: + Int = Intrinsic::aarch64_neon_fcvtas; + s = "vcvtas"; OverloadCvtInt = true; break; + case AArch64::BI__builtin_neon_vcvtas_u32_f32: + case AArch64::BI__builtin_neon_vcvtad_u64_f64: + Int = Intrinsic::aarch64_neon_fcvtau; + s = "vcvtau"; OverloadCvtInt = true; break; + case AArch64::BI__builtin_neon_vcvtms_s32_f32: + case AArch64::BI__builtin_neon_vcvtmd_s64_f64: + Int = Intrinsic::aarch64_neon_fcvtms; + s = "vcvtms"; OverloadCvtInt = true; break; + case AArch64::BI__builtin_neon_vcvtms_u32_f32: + case AArch64::BI__builtin_neon_vcvtmd_u64_f64: + Int = Intrinsic::aarch64_neon_fcvtmu; + s = "vcvtmu"; OverloadCvtInt = true; break; + case AArch64::BI__builtin_neon_vcvtns_s32_f32: + case AArch64::BI__builtin_neon_vcvtnd_s64_f64: + Int = Intrinsic::aarch64_neon_fcvtns; + s = "vcvtns"; OverloadCvtInt = true; break; + case AArch64::BI__builtin_neon_vcvtns_u32_f32: + case AArch64::BI__builtin_neon_vcvtnd_u64_f64: + Int = Intrinsic::aarch64_neon_fcvtnu; + s = "vcvtnu"; OverloadCvtInt = true; break; + case AArch64::BI__builtin_neon_vcvtps_s32_f32: + case AArch64::BI__builtin_neon_vcvtpd_s64_f64: + Int = Intrinsic::aarch64_neon_fcvtps; + s = "vcvtps"; OverloadCvtInt = true; break; + case AArch64::BI__builtin_neon_vcvtps_u32_f32: + case AArch64::BI__builtin_neon_vcvtpd_u64_f64: + Int = Intrinsic::aarch64_neon_fcvtpu; + s = "vcvtpu"; OverloadCvtInt = true; break; + case AArch64::BI__builtin_neon_vcvts_s32_f32: + case AArch64::BI__builtin_neon_vcvtd_s64_f64: + Int = Intrinsic::aarch64_neon_fcvtzs; + s = "vcvtzs"; OverloadCvtInt = true; break; + case AArch64::BI__builtin_neon_vcvts_u32_f32: + case AArch64::BI__builtin_neon_vcvtd_u64_f64: + Int = Intrinsic::aarch64_neon_fcvtzu; + s = "vcvtzu"; OverloadCvtInt = true; break; // Scalar Floating-point Reciprocal Estimate case AArch64::BI__builtin_neon_vrecpes_f32: case AArch64::BI__builtin_neon_vrecped_f64: @@ -2539,6 +2584,18 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, Tys.push_back(VTy); Tys.push_back(VTy); + F = CGF.CGM.getIntrinsic(Int, Tys); + } else if (OverloadCvtInt) { + // Determine the types of this overloaded AArch64 intrinsic + SmallVector Tys; + const Expr *Arg = E->getArg(E->getNumArgs()-1); + llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType()); + llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1); + Tys.push_back(VTy); + Ty = CGF.ConvertType(Arg->getType()); + VTy = llvm::VectorType::get(Ty, 1); + Tys.push_back(VTy); + F = CGF.CGM.getIntrinsic(Int, Tys); } else F = CGF.CGM.getIntrinsic(Int); diff --git a/test/CodeGen/aarch64-neon-fcvt-intrinsics.c b/test/CodeGen/aarch64-neon-fcvt-intrinsics.c new file mode 100644 index 0000000000..98f1389be4 --- /dev/null +++ b/test/CodeGen/aarch64-neon-fcvt-intrinsics.c @@ -0,0 +1,133 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ +// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s + +// Test new aarch64 intrinsics and types + +#include + +float32_t test_vcvtxd_f32_f64(float64_t a) { +// CHECK: test_vcvtxd_f32_f64 +// CHECK: fcvtxn {{s[0-9]+}}, {{d[0-9]+}} + return (float32_t)vcvtxd_f32_f64(a); +} + +int32_t test_vcvtas_s32_f32(float32_t a) { +// CHECK: test_vcvtas_s32_f32 +// CHECK: fcvtas {{s[0-9]+}}, {{s[0-9]+}} + return (int32_t)vcvtas_s32_f32(a); +} + +int64_t test_test_vcvtad_s64_f64(float64_t a) { +// CHECK: test_test_vcvtad_s64_f64 +// CHECK: fcvtas {{d[0-9]+}}, {{d[0-9]+}} + return (int64_t)vcvtad_s64_f64(a); +} + +uint32_t test_vcvtas_u32_f32(float32_t a) { +// CHECK: test_vcvtas_u32_f32 +// CHECK: fcvtau {{s[0-9]+}}, {{s[0-9]+}} + return (uint32_t)vcvtas_u32_f32(a); +} + +uint64_t test_vcvtad_u64_f64(float64_t a) { +// CHECK: test_vcvtad_u64_f64 +// CHECK: fcvtau {{d[0-9]+}}, {{d[0-9]+}} + return (uint64_t)vcvtad_u64_f64(a); +} + +int32_t test_vcvtms_s32_f32(float32_t a) { +// CHECK: test_vcvtms_s32_f32 +// CHECK: fcvtms {{s[0-9]+}}, {{s[0-9]+}} + return (int32_t)vcvtms_s32_f32(a); +} + +int64_t test_vcvtmd_s64_f64(float64_t a) { +// CHECK: test_vcvtmd_s64_f64 +// CHECK: fcvtms {{d[0-9]+}}, {{d[0-9]+}} + return (int64_t)vcvtmd_s64_f64(a); +} + +uint32_t test_vcvtms_u32_f32(float32_t a) { +// CHECK: test_vcvtms_u32_f32 +// CHECK: fcvtmu {{s[0-9]+}}, {{s[0-9]+}} + return (uint32_t)vcvtms_u32_f32(a); +} + +uint64_t test_vcvtmd_u64_f64(float64_t a) { +// CHECK: test_vcvtmd_u64_f64 +// CHECK: fcvtmu {{d[0-9]+}}, {{d[0-9]+}} + return (uint64_t)vcvtmd_u64_f64(a); +} + +int32_t test_vcvtns_s32_f32(float32_t a) { +// CHECK: test_vcvtns_s32_f32 +// CHECK: fcvtns {{s[0-9]+}}, {{s[0-9]+}} + return (int32_t)vcvtns_s32_f32(a); +} + +int64_t test_vcvtnd_s64_f64(float64_t a) { +// CHECK: test_vcvtnd_s64_f64 +// CHECK: fcvtns {{d[0-9]+}}, {{d[0-9]+}} + return (int64_t)vcvtnd_s64_f64(a); +} + +uint32_t test_vcvtns_u32_f32(float32_t a) { +// CHECK: test_vcvtns_u32_f32 +// CHECK: fcvtnu {{s[0-9]+}}, {{s[0-9]+}} + return (uint32_t)vcvtns_u32_f32(a); +} + +uint64_t test_vcvtnd_u64_f64(float64_t a) { +// CHECK: test_vcvtnd_u64_f64 +// CHECK: fcvtnu {{d[0-9]+}}, {{d[0-9]+}} + return (uint64_t)vcvtnd_u64_f64(a); +} + +int32_t test_vcvtps_s32_f32(float32_t a) { +// CHECK: test_vcvtps_s32_f32 +// CHECK: fcvtps {{s[0-9]+}}, {{s[0-9]+}} + return (int32_t)vcvtps_s32_f32(a); +} + +int64_t test_vcvtpd_s64_f64(float64_t a) { +// CHECK: test_vcvtpd_s64_f64 +// CHECK: fcvtps {{d[0-9]+}}, {{d[0-9]+}} + return (int64_t)vcvtpd_s64_f64(a); +} + +uint32_t test_vcvtps_u32_f32(float32_t a) { +// CHECK: test_vcvtps_u32_f32 +// CHECK: fcvtpu {{s[0-9]+}}, {{s[0-9]+}} + return (uint32_t)vcvtps_u32_f32(a); +} + +uint64_t test_vcvtpd_u64_f64(float64_t a) { +// CHECK: test_vcvtpd_u64_f64 +// CHECK: fcvtpu {{d[0-9]+}}, {{d[0-9]+}} + return (uint64_t)vcvtpd_u64_f64(a); +} + +int32_t test_vcvts_s32_f32(float32_t a) { +// CHECK: test_vcvts_s32_f32 +// CHECK: fcvtzs {{s[0-9]+}}, {{s[0-9]+}} + return (int32_t)vcvts_s32_f32(a); +} + +int64_t test_vcvtd_s64_f64(float64_t a) { +// CHECK: test_vcvtd_s64_f64 +// CHECK: fcvtzs {{d[0-9]+}}, {{d[0-9]+}} + return (int64_t)vcvtd_s64_f64(a); +} + +uint32_t test_vcvts_u32_f32(float32_t a) { +// CHECK: test_vcvts_u32_f32 +// CHECK: fcvtzu {{s[0-9]+}}, {{s[0-9]+}} + return (uint32_t)vcvts_u32_f32(a); +} + +uint64_t test_vcvtd_u64_f64(float64_t a) { +// CHECK: test_vcvtd_u64_f64 +// CHECK: fcvtzu {{d[0-9]+}}, {{d[0-9]+}} + return (uint64_t)vcvtd_u64_f64(a); +} -- 2.40.0