From 0c3a20c021ed083cdd6ea9a4459ca8acf6a752af Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 25 Feb 2014 11:13:49 +0000 Subject: [PATCH] AArch64 NEON: add 64-bit scalar intrinsics for _f64 mla/mls etc. These seem to be supported by GCC, and do make sense architecturally so we should probably have them. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@202138 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/arm_neon.td | 10 +++++++--- test/CodeGen/aarch64-neon-fma.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td index b1ae1db0f6..f1b07b70ee 100644 --- a/include/clang/Basic/arm_neon.td +++ b/include/clang/Basic/arm_neon.td @@ -623,10 +623,14 @@ def FMLA : SInst<"vfma", "dddd", "dQd">; def FMLS : SInst<"vfms", "dddd", "fdQfQd">; //////////////////////////////////////////////////////////////////////////////// -// MUL, FMA, FMS definitions with scalar argument +// MUL, MLA, MLS, FMA, FMS definitions with scalar argument def VMUL_N_A64 : IOpInst<"vmul_n", "dds", "Qd", OP_MUL_N>; -def FMLA_N : SOpInst<"vfma_n", "ddds", "fQf", OP_FMLA_N>; -def FMLS_N : SOpInst<"vfms_n", "ddds", "fQf", OP_FMLS_N>; + +def FMLA_N : SOpInst<"vfma_n", "ddds", "fQfQd", OP_FMLA_N>; +def FMLS_N : SOpInst<"vfms_n", "ddds", "fQfQd", OP_FMLS_N>; + +def MLA_N : SOpInst<"vmla_n", "ddds", "Qd", OP_MLA_N>; +def MLS_N : SOpInst<"vmls_n", "ddds", "Qd", OP_MLS_N>; //////////////////////////////////////////////////////////////////////////////// // Logical operations diff --git a/test/CodeGen/aarch64-neon-fma.c b/test/CodeGen/aarch64-neon-fma.c index b3a54be147..2e549ed44b 100644 --- a/test/CodeGen/aarch64-neon-fma.c +++ b/test/CodeGen/aarch64-neon-fma.c @@ -26,6 +26,15 @@ float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s } +float64x2_t test_vmlaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { + // CHECK-LABEL: test_vmlaq_n_f64 + return vmlaq_n_f64(a, b, c); + // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] + // CHECK: fadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d + // CHECK-FMA: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] + // CHECK-FMA: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { // CHECK-LABEL: test_vmlsq_n_f32 return vmlsq_n_f32(a, b, c); @@ -44,6 +53,15 @@ float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) { // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s } +float64x2_t test_vmlsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { + // CHECK-LABEL: test_vmlsq_n_f64 + return vmlsq_n_f64(a, b, c); + // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] + // CHECK: fsub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d + // CHECK-FMA: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] + // CHECK-FMA: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + float32x2_t test_vmla_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { // CHECK-LABEL: test_vmla_lane_f32_0 return vmla_lane_f32(a, b, v, 0); @@ -171,3 +189,14 @@ float32x4_t test_vmlsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] } +float64x2_t test_vfmaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { + // CHECK-LABEL: test_vfmaq_n_f64: + return vfmaq_n_f64(a, b, c); + // CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +float64x2_t test_vfmsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { + // CHECK-LABEL: test_vfmsq_n_f64: + return vfmsq_n_f64(a, b, c); + // CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} -- 2.40.0