From 188d635f41e4f344ea468034e0e436cbe43f3a0d Mon Sep 17 00:00:00 2001 From: Luke Cheeseman Date: Fri, 12 Jun 2015 15:52:39 +0000 Subject: [PATCH] This patch makes the NEON intrinsics vget_lane_f16, vgetq_lane_f16, vset_lane_f16 and vsetq_lane_f16 available in AArch32. Differential Revision: http://reviews.llvm.org/D10388 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@239610 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/arm_neon.td | 13 +++++++------ test/CodeGen/arm_neon_intrinsics.c | 24 ++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td index 933f204bfb..c6f879513e 100644 --- a/include/clang/Basic/arm_neon.td +++ b/include/clang/Basic/arm_neon.td @@ -802,6 +802,13 @@ def VREINTERPRET def VFMA : SInst<"vfma", "dddd", "fQf">; +//////////////////////////////////////////////////////////////////////////////// +// fp16 vector operations +def SCALAR_HALF_GET_LANE : IOpInst<"vget_lane", "sdi", "h", OP_SCALAR_HALF_GET_LN>; +def SCALAR_HALF_SET_LANE : IOpInst<"vset_lane", "dsdi", "h", OP_SCALAR_HALF_SET_LN>; +def SCALAR_HALF_GET_LANEQ : IOpInst<"vget_lane", "sdi", "Qh", OP_SCALAR_HALF_GET_LNQ>; +def SCALAR_HALF_SET_LANEQ : IOpInst<"vset_lane", "dsdi", "Qh", OP_SCALAR_HALF_SET_LNQ>; + //////////////////////////////////////////////////////////////////////////////// // AArch64 Intrinsics @@ -1594,10 +1601,4 @@ def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ssji", "SsSi", OP_SCALAR_ def SCALAR_VDUP_LANE : IInst<"vdup_lane", "sdi", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "sji", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; - -// FIXME: Rename so it is obvious this only applies to halfs. -def SCALAR_HALF_GET_LANE : IOpInst<"vget_lane", "sdi", "h", OP_SCALAR_HALF_GET_LN>; -def SCALAR_HALF_SET_LANE : IOpInst<"vset_lane", "dsdi", "h", OP_SCALAR_HALF_SET_LN>; -def SCALAR_HALF_GET_LANEQ : IOpInst<"vget_lane", "sdi", "Qh", OP_SCALAR_HALF_GET_LNQ>; -def SCALAR_HALF_SET_LANEQ : IOpInst<"vset_lane", "dsdi", "Qh", OP_SCALAR_HALF_SET_LNQ>; } diff --git a/test/CodeGen/arm_neon_intrinsics.c b/test/CodeGen/arm_neon_intrinsics.c index 756e3b43fc..d92c32c476 100644 --- a/test/CodeGen/arm_neon_intrinsics.c +++ b/test/CodeGen/arm_neon_intrinsics.c @@ -2399,6 +2399,12 @@ float32_t test_vget_lane_f32(float32x2_t a) { return vget_lane_f32(a, 1); } +// CHECK-LABEL: test_vget_lane_f16 +// CHECK: vmov +float32_t test_vget_lane_f16(float16x4_t a) { + return vget_lane_f16(a, 1); +} + // CHECK-LABEL: test_vgetq_lane_u8 // CHECK: vmov uint8_t test_vgetq_lane_u8(uint8x16_t a) { @@ -2453,6 +2459,12 @@ float32_t test_vgetq_lane_f32(float32x4_t a) { return vgetq_lane_f32(a, 3); } +// CHECK-LABEL: test_vgetq_lane_f16 +// CHECK: vmov +float32_t test_vgetq_lane_f16(float16x8_t a) { + return vgetq_lane_f16(a, 3); +} + // CHECK-LABEL: test_vget_lane_s64 // The optimizer is able to remove all moves now. int64_t test_vget_lane_s64(int64x1_t a) { @@ -9157,6 +9169,12 @@ float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) { return vset_lane_f32(a, b, 1); } +// CHECK-LABEL: test_vset_lane_f16 +// CHECK: mov +float16x4_t test_vset_lane_f16(float16_t *a, float16x4_t b) { + return vset_lane_f16(*a, b, 1); +} + // CHECK-LABEL: test_vsetq_lane_u8 // CHECK: vmov uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) { @@ -9211,6 +9229,12 @@ float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) { return vsetq_lane_f32(a, b, 3); } +// CHECK-LABEL: test_vsetq_lane_f16 +// CHECK: vmov +float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) { + return vsetq_lane_f16(*a, b, 3); +} + // CHECK-LABEL: test_vset_lane_s64 // The optimizer is able to get rid of all moves now. int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) { -- 2.40.0