From: Chad Rosier Date: Mon, 14 Oct 2013 14:37:40 +0000 (+0000) Subject: [AArch64] Add support for NEON scalar integer compare instructions. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ad40008ed9e5f4d23972d09386997da5d1a835ee;p=clang [AArch64] Add support for NEON scalar integer compare instructions. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@192597 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td index 71627eea19..bb28dc6777 100644 --- a/include/clang/Basic/arm_neon.td +++ b/include/clang/Basic/arm_neon.td @@ -839,4 +839,20 @@ def SCALAR_FRECPX : IInst<"vrecpx", "ss", "SfSd">; // Scalar Floating-point Reciprocal Square Root Estimate def SCALAR_FRSQRTE : IInst<"vrsqrte", "ss", "SfSd">; +//////////////////////////////////////////////////////////////////////////////// +// Scalar Integer Comparison +def SCALAR_CMEQ : SInst<"vceq", "sss", "SlSUl">; +def SCALAR_CMEQZ : SInst<"vceqz", "ss", "SlSUl">; +def SCALAR_CMGE : SInst<"vcge", "sss", "Sl">; +def SCALAR_CMGEZ : SInst<"vcgez", "ss", "Sl">; +def SCALAR_CMHS : SInst<"vcge", "sss", "SUl">; +def SCALAR_CMLE : SInst<"vcle", "sss", "SlSUl">; +def SCALAR_CMLEZ : SInst<"vclez", "ss", "Sl">; +def SCALAR_CMLT : SInst<"vclt", "sss", "SlSUl">; +def SCALAR_CMLTZ : SInst<"vcltz", "ss", "Sl">; +def SCALAR_CMGT : SInst<"vcgt", "sss", "Sl">; +def SCALAR_CMGTZ : SInst<"vcgtz", "ss", "Sl">; +def SCALAR_CMHI : SInst<"vcgt", "sss", "SUl">; +def SCALAR_CMTST : SInst<"vtst", "sss", "SlSUl">; + } diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index a1320bb0eb..e5361eeebe 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -1757,6 +1757,11 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, bool OverloadInt = false; const char *s = NULL; + SmallVector Ops; + for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { + Ops.push_back(CGF.EmitScalarExpr(E->getArg(i))); + } + // AArch64 scalar builtins are not overloaded, they do not have an extra // argument that specifies the vector type, need to handle each case. switch (BuiltinID) { @@ -2000,6 +2005,75 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, case AArch64::BI__builtin_neon_vrsqrted_f64: Int = Intrinsic::arm_neon_vrsqrte; s = "vrsqrte"; OverloadInt = true; break; + // Scalar Compare Equal + case AArch64::BI__builtin_neon_vceqd_s64: + case AArch64::BI__builtin_neon_vceqd_u64: + Int = Intrinsic::aarch64_neon_vceq; s = "vceq"; + OverloadInt = false; break; + // Scalar Compare Equal To Zero + case AArch64::BI__builtin_neon_vceqzd_s64: + case AArch64::BI__builtin_neon_vceqzd_u64: + Int = Intrinsic::aarch64_neon_vceq; s = "vceq"; + // Add implicit zero operand. + Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); + OverloadInt = false; break; + // Scalar Compare Greater Than or Equal + case AArch64::BI__builtin_neon_vcged_s64: + Int = Intrinsic::aarch64_neon_vcge; s = "vcge"; + OverloadInt = false; break; + case AArch64::BI__builtin_neon_vcged_u64: + Int = Intrinsic::aarch64_neon_vchs; s = "vcge"; + OverloadInt = false; break; + // Scalar Compare Greater Than or Equal To Zero + case AArch64::BI__builtin_neon_vcgezd_s64: + Int = Intrinsic::aarch64_neon_vcge; s = "vcge"; + // Add implicit zero operand. + Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); + OverloadInt = false; break; + // Scalar Compare Greater Than + case AArch64::BI__builtin_neon_vcgtd_s64: + Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt"; + OverloadInt = false; break; + case AArch64::BI__builtin_neon_vcgtd_u64: + Int = Intrinsic::aarch64_neon_vchi; s = "vcgt"; + OverloadInt = false; break; + // Scalar Compare Greater Than Zero + case AArch64::BI__builtin_neon_vcgtzd_s64: + Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt"; + // Add implicit zero operand. + Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); + OverloadInt = false; break; + // Scalar Compare Less Than or Equal + case AArch64::BI__builtin_neon_vcled_s64: + Int = Intrinsic::aarch64_neon_vcge; s = "vcge"; + OverloadInt = false; std::swap(Ops[0], Ops[1]); break; + case AArch64::BI__builtin_neon_vcled_u64: + Int = Intrinsic::aarch64_neon_vchs; s = "vchs"; + OverloadInt = false; std::swap(Ops[0], Ops[1]); break; + // Scalar Compare Less Than or Equal To Zero + case AArch64::BI__builtin_neon_vclezd_s64: + Int = Intrinsic::aarch64_neon_vclez; s = "vcle"; + // Add implicit zero operand. + Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); + OverloadInt = false; break; + // Scalar Compare Less Than + case AArch64::BI__builtin_neon_vcltd_s64: + Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt"; + OverloadInt = false; std::swap(Ops[0], Ops[1]); break; + case AArch64::BI__builtin_neon_vcltd_u64: + Int = Intrinsic::aarch64_neon_vchi; s = "vchi"; + OverloadInt = false; std::swap(Ops[0], Ops[1]); break; + // Scalar Compare Less Than Zero + case AArch64::BI__builtin_neon_vcltzd_s64: + Int = Intrinsic::aarch64_neon_vcltz; s = "vclt"; + // Add implicit zero operand. + Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); + OverloadInt = false; break; + // Scalar Compare Bitwise Test Bits + case AArch64::BI__builtin_neon_vtstd_s64: + case AArch64::BI__builtin_neon_vtstd_u64: + Int = Intrinsic::aarch64_neon_vtstd; s = "vtst"; + OverloadInt = false; break; } if (!Int) @@ -2009,7 +2083,6 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, // and should be mapped to AArch64 intrinsic that returns // one-element vector type. Function *F = 0; - SmallVector Ops; if (AcrossVec) { // Gen arg type const Expr *Arg = E->getArg(E->getNumArgs()-1); @@ -2038,10 +2111,6 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, } else F = CGF.CGM.getIntrinsic(Int); - for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { - Ops.push_back(CGF.EmitScalarExpr(E->getArg(i))); - } - Value *Result = CGF.EmitNeonCall(F, Ops, s); llvm::Type *ResultType = CGF.ConvertType(E->getType()); // AArch64 intrinsic one-element vector type cast to diff --git a/test/CodeGen/aarch64-neon-intrinsics.c b/test/CodeGen/aarch64-neon-intrinsics.c index e07cdb51b9..e4a97ea7d6 100644 --- a/test/CodeGen/aarch64-neon-intrinsics.c +++ b/test/CodeGen/aarch64-neon-intrinsics.c @@ -7009,3 +7009,111 @@ void test_vst4_p16(poly16_t *a, poly16x4x4_t b) { // CHECK: st4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] } + +int64_t test_vceqd_s64(int64_t a, int64_t b) { +// CHECK: test_vceqd_s64 +// CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + return (int64_t)vceqd_s64(a, b); +} + +uint64_t test_vceqd_u64(uint64_t a, uint64_t b) { +// CHECK: test_vceqd_u64 +// CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + return (int64_t)vceqd_u64(a, b); +} + +int64_t test_vceqzd_s64(int64_t a) { +// CHECK: test_vceqzd_s64 +// CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, #0x0 + return (int64_t)vceqzd_s64(a); +} + +int64_t test_vceqzd_u64(int64_t a) { +// CHECK: test_vceqzd_u64 +// CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, #0x0 + return (int64_t)vceqzd_u64(a); +} + +int64_t test_vcged_s64(int64_t a, int64_t b) { +// CHECK: test_vcged_s64 +// CHECK: cmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + return (int64_t)vcged_s64(a, b); +} + +uint64_t test_vcged_u64(uint64_t a, uint64_t b) { +// CHECK: test_vcged_u64 +// CHECK: cmhs {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + return (uint64_t)vcged_u64(a, b); +} + +int64_t test_vcgezd_s64(int64_t a) { +// CHECK: test_vcgezd_s64 +// CHECK: cmge {{d[0-9]+}}, {{d[0-9]+}}, #0x0 + return (int64_t)vcgezd_s64(a); +} + +int64_t test_vcgtd_s64(int64_t a, int64_t b) { +// CHECK: test_vcgtd_s64 +// CHECK: cmgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + return (int64_t)vcgtd_s64(a, b); +} + +uint64_t test_vcgtd_u64(uint64_t a, uint64_t b) { +// CHECK: test_vcgtd_u64 +// CHECK: cmhi {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + return (uint64_t)vcgtd_u64(a, b); +} + +int64_t test_vcgtzd_s64(int64_t a) { +// CHECK: test_vcgtzd_s64 +// CHECK: cmgt {{d[0-9]+}}, {{d[0-9]+}}, #0x0 + return (int64_t)vcgtzd_s64(a); +} + +int64_t test_vcled_s64(int64_t a, int64_t b) { +// CHECK: test_vcled_s64 +// CHECK: cmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + return (int64_t)vcled_s64(a, b); +} + +uint64_t test_vcled_u64(uint64_t a, uint64_t b) { +// CHECK: test_vcled_u64 +// CHECK: cmhs {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + return (uint64_t)vcled_u64(a, 0); +} + +int64_t test_vclezd_s64(int64_t a) { +// CHECK: test_vclezd_s64 +// CHECK: cmle {{d[0-9]+}}, {{d[0-9]+}}, #0x0 + return (int64_t)vclezd_s64(a); +} + +int64_t test_vcltd_s64(int64_t a, int64_t b) { +// CHECK: test_vcltd_s64 +// CHECK: cmgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + return (int64_t)vcltd_s64(a, b); +} + +uint64_t test_vcltd_u64(uint64_t a, uint64_t b) { +// CHECK: test_vcltd_u64 +// CHECK: cmhi {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + return (uint64_t)vcltd_u64(a, b); +} + +int64_t test_vcltzd_s64(int64_t a) { +// CHECK: test_vcltzd_s64 +// CHECK: cmlt {{d[0-9]+}}, {{d[0-9]+}}, #0x0 + return (int64_t)vcltzd_s64(a); +} + +int64_t test_vtstd_s64(int64_t a, int64_t b) { +// CHECK: test_vtstd_s64 +// CHECK: cmtst {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + return (int64_t)vtstd_s64(a, b); +} + +uint64_t test_vtstd_u64(uint64_t a, uint64_t b) { +// CHECK: test_vtstd_u64 +// CHECK: cmtst {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + return (uint64_t)vtstd_u64(a, b); +}