From: Chad Rosier Date: Mon, 2 Dec 2013 21:07:27 +0000 (+0000) Subject: [AArch64] Implemented vcopy_lane patterns using scalar DUP instruction. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5e2f8fdda97247644715af72b2473711cf2b0df2;p=clang [AArch64] Implemented vcopy_lane patterns using scalar DUP instruction. Patch by Ana Pazos! git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@196153 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td index 4ae0859665..3ad1fc84cc 100644 --- a/include/clang/Basic/arm_neon.td +++ b/include/clang/Basic/arm_neon.td @@ -836,13 +836,13 @@ def GET_LANE : IInst<"vget_lane", "sdi", def SET_LANE : IInst<"vset_lane", "dsdi", "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPsfdQfQdPlQPl">; def COPY_LANE : IOpInst<"vcopy_lane", "ddidi", - "csiPcPsUcUsUiPcPsfPl", OP_COPY_LN>; + "csilPcPsUcUsUiUlPcPsPlfd", OP_COPY_LN>; def COPYQ_LANE : IOpInst<"vcopy_lane", "ddigi", "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPYQ_LN>; def COPY_LANEQ : IOpInst<"vcopy_laneq", "ddiki", - "csiPcPsUcUsUif", OP_COPY_LNQ>; + "csilPcPsPlUcUsUiUlf", OP_COPY_LNQ>; def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "ddidi", - "QcQsQiQlQUcQUsQUiQUlQPcQPsQfdQPl", OP_COPY_LN>; + "QcQsQiQlQUcQUsQUiQUlQPcQPsQfdQdQPl", OP_COPY_LN>; //////////////////////////////////////////////////////////////////////////////// // Set all lanes to same value diff --git a/test/CodeGen/aarch64-neon-copy.c b/test/CodeGen/aarch64-neon-copy.c index 7c77b177af..6bc6f005f3 100644 --- a/test/CodeGen/aarch64-neon-copy.c +++ b/test/CodeGen/aarch64-neon-copy.c @@ -1192,3 +1192,56 @@ float64x2_t test_vmovq_n_f64(float64_t v1) { return vmovq_n_f64(v1); // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] } + +// CHECK: test_vcopy_lane_s64 +int64x1_t test_vcopy_lane_s64(int64x1_t a, int64x1_t c) { + return vcopy_lane_s64(a, 0, c, 0); +// CHECK: fmov {{d[0-9]+}}, {{d[0-9]+}} +// CHECK-NOT: dup {{d[0-9]+}}, {{v[0-9]+}}.d[0] +} + +// CHECK: test_vcopy_lane_u64 +uint64x1_t test_vcopy_lane_u64(uint64x1_t a, uint64x1_t c) { + return vcopy_lane_u64(a, 0, c, 0); +// CHECK: fmov {{d[0-9]+}}, {{d[0-9]+}} +// CHECK-NOT: dup {{d[0-9]+}}, {{v[0-9]+}}.d[0] +} + +// CHECK: test_vcopy_lane_f64 +float64x1_t test_vcopy_lane_f64(float64x1_t a, float64x1_t c) { + return vcopy_lane_f64(a, 0, c, 0); +// CHECK: fmov {{d[0-9]+}}, {{d[0-9]+}} +// CHECK-NOT: dup {{d[0-9]+}}, {{v[0-9]+}}.d[0] +} + +// CHECK: test_vcopy_laneq_s64 +int64x1_t test_vcopy_laneq_s64(int64x1_t a, int64x2_t c) { + return vcopy_laneq_s64(a, 0, c, 1); +// CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] +} + +// CHECK: test_vcopy_laneq_u64 +uint64x1_t test_vcopy_laneq_u64(uint64x1_t a, uint64x2_t c) { + return vcopy_laneq_u64(a, 0, c, 1); +// CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] +} + +// CHECK: test_vcopy_laneq_f64 +float64x1_t test_vcopy_laneq_f64(float64x1_t a, float64x1_t c) { + return vcopy_laneq_f64(a, 0, c, 0); +// CHECK: fmov {{d[0-9]+}}, {{d[0-9]+}} +// CHECK-NOT: dup {{d[0-9]+}}, {{v[0-9]+}}.d[0] +} + +// CHECK: test_vcopy_laneq_p64 +poly64x1_t test_vcopy_laneq_p64(poly64x1_t a, poly64x2_t c) { + return vcopy_laneq_p64(a, 0, c, 1); +// CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] +} + +// CHECK: test_vcopyq_laneq_f64 +float64x2_t test_vcopyq_laneq_f64(float64x2_t a, float64x2_t c) { +// CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[1] + return vcopyq_laneq_f64(a, 1, c, 1); +} +