From: Chad Rosier Date: Mon, 7 Oct 2013 17:07:17 +0000 (+0000) Subject: [AArch64] Add support for NEON scalar arithmetic instructions: X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=48f98fe7c1374bf416403bf82b29d4fc11011057;p=clang [AArch64] Add support for NEON scalar arithmetic instructions: SQDMULH, SQRDMULH, FMULX, FRECPS, and FRSQRTS. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@192112 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td index 10150249b4..3482b8f77f 100644 --- a/include/clang/Basic/arm_neon.td +++ b/include/clang/Basic/arm_neon.td @@ -745,4 +745,24 @@ def SCALAR_FMINP : SInst<"vpmin", "sd", "SfSQd">; def SCALAR_FMAXNMP : SInst<"vpmaxnm", "sd", "SfSQd">; def SCALAR_FMINNMP : SInst<"vpminnm", "sd", "SfSQd">; +//////////////////////////////////////////////////////////////////////////////// +// Scalar Integer Saturating Doubling Multiply Half High +def SCALAR_SQDMULH : SInst<"vqdmulh", "sss", "SsSi">; + +//////////////////////////////////////////////////////////////////////////////// +// Scalar Integer Saturating Rounding Doubling Multiply Half High +def SCALAR_SQRDMULH : SInst<"vqrdmulh", "sss", "SsSi">; + +//////////////////////////////////////////////////////////////////////////////// +// Scalar Floating-point Multiply Extended +def SCALAR_FMULX : IInst<"vmulx", "sss", "SfSd">; + +//////////////////////////////////////////////////////////////////////////////// +// Scalar Floating-point Reciprocal Step +def SCALAR_FRECPS : IInst<"vrecps", "sss", "SfSd">; + +//////////////////////////////////////////////////////////////////////////////// +// Scalar Floating-point Reciprocal Square Root Step +def SCALAR_FRSQRTS : IInst<"vrsqrts", "sss", "SfSd">; + } diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 52e40db540..9c526107ed 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -1945,6 +1945,31 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, case AArch64::BI__builtin_neon_vminnmvq_f32: Int = Intrinsic::aarch64_neon_vminnmv; AcrossVec = true; ExtendEle = false; s = "vminnmv"; break; + // Scalar Integer Saturating Doubling Multiply Half High + case AArch64::BI__builtin_neon_vqdmulhh_s16: + case AArch64::BI__builtin_neon_vqdmulhs_s32: + Int = Intrinsic::arm_neon_vqdmulh; + s = "vqdmulh"; OverloadInt = true; break; + // Scalar Integer Saturating Rounding Doubling Multiply Half High + case AArch64::BI__builtin_neon_vqrdmulhh_s16: + case AArch64::BI__builtin_neon_vqrdmulhs_s32: + Int = Intrinsic::arm_neon_vqrdmulh; + s = "vqrdmulh"; OverloadInt = true; break; + // Scalar Floating-point Multiply Extended + case AArch64::BI__builtin_neon_vmulxs_f32: + case AArch64::BI__builtin_neon_vmulxd_f64: + Int = Intrinsic::aarch64_neon_vmulx; + s = "vmulx"; OverloadInt = true; break; + // Scalar Floating-point Reciprocal Step and + case AArch64::BI__builtin_neon_vrecpss_f32: + case AArch64::BI__builtin_neon_vrecpsd_f64: + Int = Intrinsic::arm_neon_vrecps; + s = "vrecps"; OverloadInt = true; break; + // Scalar Floating-point Reciprocal Square Root Step + case AArch64::BI__builtin_neon_vrsqrtss_f32: + case AArch64::BI__builtin_neon_vrsqrtsd_f64: + Int = Intrinsic::arm_neon_vrsqrts; + s = "vrsqrts"; OverloadInt = true; break; } if (!Int) diff --git a/test/CodeGen/aarch64-neon-intrinsics.c b/test/CodeGen/aarch64-neon-intrinsics.c index b71256a665..07b15b55f6 100644 --- a/test/CodeGen/aarch64-neon-intrinsics.c +++ b/test/CodeGen/aarch64-neon-intrinsics.c @@ -5538,3 +5538,62 @@ float64_t test_vpminqd_f64(float64x2_t a) { // CHECK: fminp {{d[0-9]+}}, {{v[0-9]+}}.2d } +int16_t test_vqdmulhh_s16(int16_t a, int16_t b) { +// CHECK: test_vqdmulhh_s16 + return vqdmulhh_s16(a, b); +// CHECK: sqdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} +} + +int32_t test_vqdmulhs_s32(int32_t a, int32_t b) { +// CHECK: test_vqdmulhs_s32 + return vqdmulhs_s32(a, b); +// CHECK: sqdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +} + +int16_t test_vqrdmulhh_s16(int16_t a, int16_t b) { +// CHECK: test_vqrdmulhh_s16 + return vqrdmulhh_s16(a, b); +// CHECK: sqrdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} +} + +int32_t test_vqrdmulhs_s32(int32_t a, int32_t b) { +// CHECK: test_vqrdmulhs_s32 + return vqrdmulhs_s32(a, b); +// CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +} + +float32_t test_vmulxs_f32(float32_t a, float32_t b) { +// CHECK: test_vmulxs_f32 + return vmulxs_f32(a, b); +// CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +} + +float64_t test_vmulxd_f64(float64_t a, float64_t b) { +// CHECK: test_vmulxd_f64 + return vmulxd_f64(a, b); +// CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +float32_t test_vrecpss_f32(float32_t a, float32_t b) { +// CHECK: test_vrecpss_f32 + return vrecpss_f32(a, b); +// CHECK: frecps {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +} + +float64_t test_vrecpsd_f64(float64_t a, float64_t b) { +// CHECK: test_vrecpsd_f64 + return vrecpsd_f64(a, b); +// CHECK: frecps {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +float32_t test_vrsqrtss_f32(float32_t a, float32_t b) { +// CHECK: test_vrsqrtss_f32 + return vrsqrtss_f32(a, b); +// CHECK: frsqrts {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +} + +float64_t test_vrsqrtsd_f64(float64_t a, float64_t b) { +// CHECK: test_vrsqrtsd_f64 + return vrsqrtsd_f64(a, b); +// CHECK: frsqrts {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +}