From 564020954e9eb01293d90802c89a02f87301e095 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 30 Oct 2013 15:20:07 +0000 Subject: [PATCH] [AArch64] Add support for NEON scalar floating-point compare instructions. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@193692 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/arm_neon.td | 24 ++++ lib/CodeGen/CGBuiltin.cpp | 124 +++++++++++++++--- test/CodeGen/aarch64-neon-intrinsics.c | 169 +++++++++++++++++++++++++ utils/TableGen/NeonEmitter.cpp | 2 + 4 files changed, 304 insertions(+), 15 deletions(-) diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td index f370bd7de1..14bcee31a0 100644 --- a/include/clang/Basic/arm_neon.td +++ b/include/clang/Basic/arm_neon.td @@ -169,6 +169,7 @@ class NoTestOpInst : Inst {} // z: scalar of half width element type, signed // r: scalar of double width element type, signed // a: scalar of element type (splat to vector type) +// b: scalar of unsigned integer/long type (int/float args) // y: scalar of float // o: scalar of double // k: default elt width, double num elts @@ -856,6 +857,29 @@ def SCALAR_CMGTZ : SInst<"vcgtz", "ss", "Sl">; def SCALAR_CMHI : SInst<"vcgt", "sss", "SUl">; def SCALAR_CMTST : SInst<"vtst", "sss", "SlSUl">; +//////////////////////////////////////////////////////////////////////////////// +// Scalar Floating-point Comparison +def SCALAR_FCMEQ : IInst<"vceq", "bss", "SfSd">; +def SCALAR_FCMEQZ : IInst<"vceqz", "bs", "SfSd">; +def SCALAR_FCMGE : IInst<"vcge", "bss", "SfSd">; +def SCALAR_FCMGEZ : IInst<"vcgez", "bs", "SfSd">; +def SCALAR_FCMGT : IInst<"vcgt", "bss", "SfSd">; +def SCALAR_FCMGTZ : IInst<"vcgtz", "bs", "SfSd">; +def SCALAR_FCMLE : IInst<"vcle", "bss", "SfSd">; +def SCALAR_FCMLEZ : IInst<"vclez", "bs", "SfSd">; +def SCALAR_FCMLT : IInst<"vclt", "bss", "SfSd">; +def SCALAR_FCMLTZ : IInst<"vcltz", "bs", "SfSd">; + +//////////////////////////////////////////////////////////////////////////////// +// Scalar Floating-point Absolute Compare Mask Greater Than Or Equal +def SCALAR_FACGE : IInst<"vcage", "bss", "SfSd">; +def SCALAR_FACLE : IInst<"vcale", "bss", "SfSd">; + +//////////////////////////////////////////////////////////////////////////////// +// Scalar Floating-point Absolute Compare Mask Greater Than +def SCALAR_FACGT : IInst<"vcagt", "bss", "SfSd">; +def SCALAR_FACLT : IInst<"vcalt", "bss", "SfSd">; + //////////////////////////////////////////////////////////////////////////////// // Scalar Absolute Value def SCALAR_ABS : SInst<"vabs", "ss", "Sl">; diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 8c82eb2d91..75686ceb86 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -1755,6 +1755,7 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, // Extend element of one-element vector bool ExtendEle = false; bool OverloadInt = false; + bool OverloadCmpInt = false; bool OverloadWideInt = false; bool OverloadNarrowInt = false; const char *s = NULL; @@ -2011,71 +2012,151 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, case AArch64::BI__builtin_neon_vceqd_s64: case AArch64::BI__builtin_neon_vceqd_u64: Int = Intrinsic::aarch64_neon_vceq; s = "vceq"; - OverloadInt = false; break; + OverloadCmpInt = true; break; // Scalar Compare Equal To Zero case AArch64::BI__builtin_neon_vceqzd_s64: case AArch64::BI__builtin_neon_vceqzd_u64: Int = Intrinsic::aarch64_neon_vceq; s = "vceq"; // Add implicit zero operand. Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); - OverloadInt = false; break; + OverloadCmpInt = true; break; // Scalar Compare Greater Than or Equal case AArch64::BI__builtin_neon_vcged_s64: Int = Intrinsic::aarch64_neon_vcge; s = "vcge"; - OverloadInt = false; break; + OverloadCmpInt = true; break; case AArch64::BI__builtin_neon_vcged_u64: Int = Intrinsic::aarch64_neon_vchs; s = "vcge"; - OverloadInt = false; break; + OverloadCmpInt = true; break; // Scalar Compare Greater Than or Equal To Zero case AArch64::BI__builtin_neon_vcgezd_s64: Int = Intrinsic::aarch64_neon_vcge; s = "vcge"; // Add implicit zero operand. Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); - OverloadInt = false; break; + OverloadCmpInt = true; break; // Scalar Compare Greater Than case AArch64::BI__builtin_neon_vcgtd_s64: Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt"; - OverloadInt = false; break; + OverloadCmpInt = true; break; case AArch64::BI__builtin_neon_vcgtd_u64: Int = Intrinsic::aarch64_neon_vchi; s = "vcgt"; - OverloadInt = false; break; + OverloadCmpInt = true; break; // Scalar Compare Greater Than Zero case AArch64::BI__builtin_neon_vcgtzd_s64: Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt"; // Add implicit zero operand. Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); - OverloadInt = false; break; + OverloadCmpInt = true; break; // Scalar Compare Less Than or Equal case AArch64::BI__builtin_neon_vcled_s64: Int = Intrinsic::aarch64_neon_vcge; s = "vcge"; - OverloadInt = false; std::swap(Ops[0], Ops[1]); break; + OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break; case AArch64::BI__builtin_neon_vcled_u64: Int = Intrinsic::aarch64_neon_vchs; s = "vchs"; - OverloadInt = false; std::swap(Ops[0], Ops[1]); break; + OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break; // Scalar Compare Less Than or Equal To Zero case AArch64::BI__builtin_neon_vclezd_s64: Int = Intrinsic::aarch64_neon_vclez; s = "vcle"; // Add implicit zero operand. Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); - OverloadInt = false; break; + OverloadCmpInt = true; break; // Scalar Compare Less Than case AArch64::BI__builtin_neon_vcltd_s64: Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt"; - OverloadInt = false; std::swap(Ops[0], Ops[1]); break; + OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break; case AArch64::BI__builtin_neon_vcltd_u64: Int = Intrinsic::aarch64_neon_vchi; s = "vchi"; - OverloadInt = false; std::swap(Ops[0], Ops[1]); break; + OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break; // Scalar Compare Less Than Zero case AArch64::BI__builtin_neon_vcltzd_s64: Int = Intrinsic::aarch64_neon_vcltz; s = "vclt"; // Add implicit zero operand. Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); - OverloadInt = false; break; + OverloadCmpInt = true; break; + // Scalar Floating-point Compare Equal + case AArch64::BI__builtin_neon_vceqs_f32: + case AArch64::BI__builtin_neon_vceqd_f64: + Int = Intrinsic::aarch64_neon_vceq; s = "vceq"; + OverloadCmpInt = true; break; + // Scalar Floating-point Compare Equal To Zero + case AArch64::BI__builtin_neon_vceqzs_f32: + case AArch64::BI__builtin_neon_vceqzd_f64: + Int = Intrinsic::aarch64_neon_vceq; s = "vceq"; + // Add implicit zero operand. + Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); + OverloadCmpInt = true; break; + // Scalar Floating-point Compare Greater Than Or Equal + case AArch64::BI__builtin_neon_vcges_f32: + case AArch64::BI__builtin_neon_vcged_f64: + Int = Intrinsic::aarch64_neon_vcge; s = "vcge"; + OverloadCmpInt = true; break; + // Scalar Floating-point Compare Greater Than Or Equal To Zero + case AArch64::BI__builtin_neon_vcgezs_f32: + case AArch64::BI__builtin_neon_vcgezd_f64: + Int = Intrinsic::aarch64_neon_vcge; s = "vcge"; + // Add implicit zero operand. + Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); + OverloadCmpInt = true; break; + // Scalar Floating-point Compare Greather Than + case AArch64::BI__builtin_neon_vcgts_f32: + case AArch64::BI__builtin_neon_vcgtd_f64: + Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt"; + OverloadCmpInt = true; break; + // Scalar Floating-point Compare Greather Than Zero + case AArch64::BI__builtin_neon_vcgtzs_f32: + case AArch64::BI__builtin_neon_vcgtzd_f64: + Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt"; + // Add implicit zero operand. + Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); + OverloadCmpInt = true; break; + // Scalar Floating-point Compare Less Than or Equal + case AArch64::BI__builtin_neon_vcles_f32: + case AArch64::BI__builtin_neon_vcled_f64: + Int = Intrinsic::aarch64_neon_vcge; s = "vcge"; + OverloadCmpInt = true; break; + // Scalar Floating-point Compare Less Than Or Equal To Zero + case AArch64::BI__builtin_neon_vclezs_f32: + case AArch64::BI__builtin_neon_vclezd_f64: + Int = Intrinsic::aarch64_neon_vclez; s = "vcle"; + // Add implicit zero operand. + Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); + OverloadCmpInt = true; break; + // Scalar Floating-point Compare Less Than Zero + case AArch64::BI__builtin_neon_vclts_f32: + case AArch64::BI__builtin_neon_vcltd_f64: + Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt"; + OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break; + // Scalar Floating-point Compare Less Than Zero + case AArch64::BI__builtin_neon_vcltzs_f32: + case AArch64::BI__builtin_neon_vcltzd_f64: + Int = Intrinsic::aarch64_neon_vcltz; s = "vclt"; + // Add implicit zero operand. + Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); + OverloadCmpInt = true; break; + // Scalar Floating-point Absolute Compare Greater Than Or Equal + case AArch64::BI__builtin_neon_vcages_f32: + case AArch64::BI__builtin_neon_vcaged_f64: + Int = Intrinsic::aarch64_neon_vcage; s = "vcage"; + OverloadCmpInt = true; break; + // Scalar Floating-point Absolute Compare Greater Than + case AArch64::BI__builtin_neon_vcagts_f32: + case AArch64::BI__builtin_neon_vcagtd_f64: + Int = Intrinsic::aarch64_neon_vcagt; s = "vcagt"; + OverloadCmpInt = true; break; + // Scalar Floating-point Absolute Compare Less Than Or Equal + case AArch64::BI__builtin_neon_vcales_f32: + case AArch64::BI__builtin_neon_vcaled_f64: + Int = Intrinsic::aarch64_neon_vcage; s = "vcage"; + OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break; + // Scalar Floating-point Absolute Compare Less Than + case AArch64::BI__builtin_neon_vcalts_f32: + case AArch64::BI__builtin_neon_vcaltd_f64: + Int = Intrinsic::aarch64_neon_vcagt; s = "vcalt"; + OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break; // Scalar Compare Bitwise Test Bits case AArch64::BI__builtin_neon_vtstd_s64: case AArch64::BI__builtin_neon_vtstd_u64: Int = Intrinsic::aarch64_neon_vtstd; s = "vtst"; - OverloadInt = false; break; + OverloadCmpInt = true; break; // Scalar Absolute Value case AArch64::BI__builtin_neon_vabsd_s64: Int = Intrinsic::aarch64_neon_vabs; @@ -2187,6 +2268,19 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, llvm::VectorType::getExtendedElementVectorType(VTy) : llvm::VectorType::getTruncatedElementVectorType(VTy); F = CGF.CGM.getIntrinsic(Int, RTy); + } else if (OverloadCmpInt) { + // Determine the types of this overloaded AArch64 intrinsic + SmallVector Tys; + const Expr *Arg = E->getArg(E->getNumArgs()-1); + llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType()); + llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1); + Tys.push_back(VTy); + Ty = CGF.ConvertType(Arg->getType()); + VTy = llvm::VectorType::get(Ty, 1); + Tys.push_back(VTy); + Tys.push_back(VTy); + + F = CGF.CGM.getIntrinsic(Int, Tys); } else F = CGF.CGM.getIntrinsic(Int); diff --git a/test/CodeGen/aarch64-neon-intrinsics.c b/test/CodeGen/aarch64-neon-intrinsics.c index 1696b49d9b..e4496c381f 100644 --- a/test/CodeGen/aarch64-neon-intrinsics.c +++ b/test/CodeGen/aarch64-neon-intrinsics.c @@ -7315,3 +7315,172 @@ int32_t test_vqmovnd_u64(int64_t a) { // CHECK: uqxtn {{s[0-9]+}}, {{d[0-9]+}} return (int32_t)vqmovnd_u64(a); } + +uint32_t test_vceqs_f32(float32_t a, float32_t b) { +// CHECK: test_vceqs_f32 +// CHECK: fcmeq {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + return (uint32_t)vceqs_f32(a, b); +} + +uint64_t test_vceqd_f64(float64_t a, float64_t b) { +// CHECK: test_vceqd_f64 +// CHECK: fcmeq {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + return (uint64_t)vceqd_f64(a, b); +} + +uint32_t test_vceqzs_f32(float32_t a) { +// CHECK: test_vceqzs_f32 +// CHECK: fcmeq {{s[0-9]+}}, {{s[0-9]+}}, #0.0 + return (uint32_t)vceqzs_f32(a); +} + +uint64_t test_vceqzd_f64(float64_t a) { +// CHECK: test_vceqzd_f64 +// CHECK: fcmeq {{d[0-9]+}}, {{d[0-9]+}}, #0.0 + return (uint64_t)vceqzd_f64(a); +} + +uint32_t test_vcges_f32(float32_t a, float32_t b) { +// CHECK: test_vcges_f32 +// CHECK: fcmge {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + return (uint32_t)vcges_f32(a, b); +} + +uint64_t test_vcged_f64(float64_t a, float64_t b) { +// CHECK: test_vcged_f64 +// CHECK: fcmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + return (uint64_t)vcged_f64(a, b); +} + +uint32_t test_vcgezs_f32(float32_t a) { +// CHECK: test_vcgezs_f32 +// CHECK: fcmge {{s[0-9]+}}, {{s[0-9]+}}, #0.0 + return (uint32_t)vcgezs_f32(a); +} + +uint64_t test_vcgezd_f64(float64_t a) { +// CHECK: test_vcgezd_f64 +// CHECK: fcmge {{d[0-9]+}}, {{d[0-9]+}}, #0.0 + return (uint64_t)vcgezd_f64(a); +} + +uint32_t test_vcgts_f32(float32_t a, float32_t b) { +// CHECK: test_vcgts_f32 +// CHECK: fcmgt {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + return (uint32_t)vcgts_f32(a, b); +} + +uint64_t test_vcgtd_f64(float64_t a, float64_t b) { +// CHECK: test_vcgtd_f64 +// CHECK: fcmgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + return (uint64_t)vcgtd_f64(a, b); +} + +uint32_t test_vcgtzs_f32(float32_t a) { +// CHECK: test_vcgtzs_f32 +// CHECK: fcmgt {{s[0-9]+}}, {{s[0-9]+}}, #0.0 + return (uint32_t)vcgtzs_f32(a); +} + +uint64_t test_vcgtzd_f64(float64_t a) { +// CHECK: test_vcgtzd_f64 +// CHECK: fcmgt {{d[0-9]+}}, {{d[0-9]+}}, #0.0 + return (uint64_t)vcgtzd_f64(a); +} + +uint32_t test_vcles_f32(float32_t a, float32_t b) { +// CHECK: test_vcles_f32 +// CHECK: fcmge {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + return (uint32_t)vcles_f32(a, b); +} + +uint64_t test_vcled_f64(float64_t a, float64_t b) { +// CHECK: test_vcled_f64 +// CHECK: fcmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + return (uint64_t)vcled_f64(a, b); +} + +uint32_t test_vclezs_f32(float32_t a) { +// CHECK: test_vclezs_f32 +// CHECK: fcmle {{s[0-9]+}}, {{s[0-9]+}}, #0.0 + return (uint32_t)vclezs_f32(a); +} + +uint64_t test_vclezd_f64(float64_t a) { +// CHECK: test_vclezd_f64 +// CHECK: fcmle {{d[0-9]+}}, {{d[0-9]+}}, #0.0 + return (uint64_t)vclezd_f64(a); +} + +uint32_t test_vclts_f32(float32_t a, float32_t b) { +// CHECK: test_vclts_f32 +// CHECK: fcmgt {{s[0-9]+}}, s1, s0 + return (uint32_t)vclts_f32(a, b); +} + +uint64_t test_vcltd_f64(float64_t a, float64_t b) { +// CHECK: test_vcltd_f64 +// CHECK: fcmgt {{d[0-9]+}}, d1, d0 + return (uint64_t)vcltd_f64(a, b); +} + +uint32_t test_vcltzs_f32(float32_t a) { +// CHECK: test_vcltzs_f32 +// CHECK: fcmlt {{s[0-9]+}}, {{s[0-9]+}}, #0.0 + return (uint32_t)vcltzs_f32(a); +} + +uint64_t test_vcltzd_f64(float64_t a) { +// CHECK: test_vcltzd_f64 +// CHECK: fcmlt {{d[0-9]+}}, {{d[0-9]+}}, #0.0 + return (uint64_t)vcltzd_f64(a); +} + +uint32_t test_vcages_f32(float32_t a, float32_t b) { +// CHECK: test_vcages_f32 +// CHECK: facge {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + return (uint32_t)vcages_f32(a, b); +} + +uint64_t test_vcaged_f64(float64_t a, float64_t b) { +// CHECK: test_vcaged_f64 +// CHECK: facge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + return (uint64_t)vcaged_f64(a, b); +} + +uint32_t test_vcagts_f32(float32_t a, float32_t b) { +// CHECK: test_vcagts_f32 +// CHECK: facgt {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + return (uint32_t)vcagts_f32(a, b); +} + +uint64_t test_vcagtd_f64(float64_t a, float64_t b) { +// CHECK: test_vcagtd_f64 +// CHECK: facgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + return (uint64_t)vcagtd_f64(a, b); +} + +uint32_t test_vcales_f32(float32_t a, float32_t b) { +// CHECK: test_vcales_f32 +// CHECK: facge {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + return (uint32_t)vcales_f32(a, b); +} + +uint64_t test_vcaled_f64(float64_t a, float64_t b) { +// CHECK: test_vcaled_f64 +// CHECK: facge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + return (uint64_t)vcaled_f64(a, b); +} + +uint32_t test_vcalts_f32(float32_t a, float32_t b) { +// CHECK: test_vcalts_f32 +// CHECK: facgt {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + return (uint32_t)vcalts_f32(a, b); +} + +uint64_t test_vcaltd_f64(float64_t a, float64_t b) { +// CHECK: test_vcaltd_f64 +// CHECK: facgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + return (uint64_t)vcaltd_f64(a, b); +} + diff --git a/utils/TableGen/NeonEmitter.cpp b/utils/TableGen/NeonEmitter.cpp index ca9f35b0bd..8e60c29638 100644 --- a/utils/TableGen/NeonEmitter.cpp +++ b/utils/TableGen/NeonEmitter.cpp @@ -442,6 +442,8 @@ static char ModType(const char mod, char type, bool &quad, bool &poly, usgn = true; } break; + case 'b': + scal = true; case 'u': usgn = true; poly = false; -- 2.40.0