From: Chad Rosier Date: Thu, 17 Oct 2013 18:12:50 +0000 (+0000) Subject: [AArch64] Add support for NEON scalar three register different instruction X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d867422c86914e055b8772ec1f22b349e15de94b;p=clang [AArch64] Add support for NEON scalar three register different instruction class. The instruction class includes the signed saturating doubling multiply-add long, signed saturating doubling multiply-subtract long, and the signed saturating doubling multiply long instructions. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@192909 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td index 56b7e9c15b..560ed9f01f 100644 --- a/include/clang/Basic/arm_neon.td +++ b/include/clang/Basic/arm_neon.td @@ -878,4 +878,16 @@ def SCALAR_SUQADD : SInst<"vuqadd", "sss", "ScSsSiSl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Unsigned Saturating Accumulated of Signed Value def SCALAR_USQADD : SInst<"vsqadd", "sss", "SUcSUsSUiSUl">; + +//////////////////////////////////////////////////////////////////////////////// +// Signed Saturating Doubling Multiply-Add Long +def SCALAR_SQDMLAL : SInst<"vqdmlal", "rss", "SsSi">; + +//////////////////////////////////////////////////////////////////////////////// +// Signed Saturating Doubling Multiply-Subtract Long +def SCALAR_SQDMLSL : SInst<"vqdmlsl", "rss", "SsSi">; + +//////////////////////////////////////////////////////////////////////////////// +// Signed Saturating Doubling Multiply Long +def SCALAR_SQDMULL : SInst<"vqdmull", "rss", "SsSi">; } diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 36829391e5..0802b3a3f6 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -1755,6 +1755,7 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, // Extend element of one-element vector bool ExtendEle = false; bool OverloadInt = false; + bool OverloadWideInt = false; const char *s = NULL; SmallVector Ops; @@ -2110,6 +2111,21 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, case AArch64::BI__builtin_neon_vsqaddd_u64: Int = Intrinsic::aarch64_neon_vsqadd; s = "vsqadd"; OverloadInt = true; break; + // Signed Saturating Doubling Multiply-Add Long + case AArch64::BI__builtin_neon_vqdmlalh_s16: + case AArch64::BI__builtin_neon_vqdmlals_s32: + Int = Intrinsic::aarch64_neon_vqdmlal; + s = "vqdmlal"; OverloadWideInt = true; break; + // Signed Saturating Doubling Multiply-Subtract Long + case AArch64::BI__builtin_neon_vqdmlslh_s16: + case AArch64::BI__builtin_neon_vqdmlsls_s32: + Int = Intrinsic::aarch64_neon_vqdmlsl; + s = "vqdmlsl"; OverloadWideInt = true; break; + // Signed Saturating Doubling Multiply Long + case AArch64::BI__builtin_neon_vqdmullh_s16: + case AArch64::BI__builtin_neon_vqdmulls_s32: + Int = Intrinsic::aarch64_neon_vqdmull; + s = "vqdmull"; OverloadWideInt = true; break; } if (!Int) @@ -2135,8 +2151,7 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, llvm::Type *Tys[2] = {RTy, VTy}; F = CGF.CGM.getIntrinsic(Int, Tys); assert(E->getNumArgs() == 1); - } - else if (OverloadInt) { + } else if (OverloadInt) { // Determine the type of this overloaded AArch64 intrinsic const Expr *Arg = E->getArg(E->getNumArgs()-1); llvm::Type *Ty = CGF.ConvertType(Arg->getType()); @@ -2144,6 +2159,13 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, assert(VTy); F = CGF.CGM.getIntrinsic(Int, VTy); + } else if (OverloadWideInt) { + // Determine the type of this overloaded AArch64 intrinsic + const Expr *Arg = E->getArg(E->getNumArgs()-1); + llvm::Type *Ty = CGF.ConvertType(Arg->getType()); + llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1); + llvm::VectorType *RTy = llvm::VectorType::getExtendedElementVectorType(VTy); + F = CGF.CGM.getIntrinsic(Int, RTy); } else F = CGF.CGM.getIntrinsic(Int); diff --git a/test/CodeGen/aarch64-neon-intrinsics.c b/test/CodeGen/aarch64-neon-intrinsics.c index d3fd0cd585..ffee567c89 100644 --- a/test/CodeGen/aarch64-neon-intrinsics.c +++ b/test/CodeGen/aarch64-neon-intrinsics.c @@ -7225,3 +7225,39 @@ uint64_t test_vsqaddd_u64(uint64_t a, uint64_t b) { // CHECK: usqadd {{d[0-9]+}}, {{d[0-9]+}} return (uint64_t)vsqaddd_u64(a, b); } + +int32_t test_vqdmlalh_s16(int16_t a, int16_t b) { +// CHECK: test_vqdmlalh_s16 +// CHECK: sqdmlal {{s[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} + return (int32_t)vqdmlalh_s16(a, b); +} + +int64_t test_vqdmlals_s32(int32_t a, int32_t b) { +// CHECK: test_vqdmlals_s32 +// CHECK: sqdmlal {{d[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + return (int64_t)vqdmlals_s32(a, b); +} + +int32_t test_vqdmlslh_s16(int16_t a, int16_t b) { +// CHECK: test_vqdmlslh_s16 +// CHECK: sqdmlsl {{s[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} + return (int32_t)vqdmlslh_s16(a, b); +} + +int64_t test_vqdmlsls_s32(int32_t a, int32_t b) { +// CHECK: test_vqdmlsls_s32 +// CHECK: sqdmlsl {{d[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + return (int64_t)vqdmlsls_s32(a, b); +} + +int32_t test_vqdmullh_s16(int16_t a, int16_t b) { +// CHECK: test_vqdmullh_s16 +// CHECK: sqdmull {{s[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} + return (int32_t)vqdmullh_s16(a, b); +} + +int64_t test_vqdmulls_s32(int32_t a, int32_t b) { +// CHECK: test_vqdmulls_s32 +// CHECK: sqdmull {{d[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + return (int64_t)vqdmulls_s32(a, b); +}