From ba3f83af6f4f3bd1d44fcd42399b3d932f2eebe0 Mon Sep 17 00:00:00 2001 From: "Diogo N. Sampaio" Date: Wed, 10 Apr 2019 13:28:06 +0000 Subject: [PATCH] [ARM] [FIX] Add missing f16 vector operations lowering Summary: Add missing <8xhalf> shufflevectors pattern, when using concat_vector dag node. As well, allows <8xhalf> and <4xhalf> vldup1 operations. These instructions are required for v8.2a fp16 lowering of vmul_n_f16, vmulq_n_f16 and vmulq_lane_f16 intrinsics. Reviewers: olista01, pbarrio, LukeGeeson, efriedma Reviewed By: efriedma Subscribers: efriedma, javed.absar, kristof.beyls, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D60319 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358081 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelDAGToDAG.cpp | 5 ++- lib/Target/ARM/ARMInstrNEON.td | 2 ++ .../ARM/armv8.2a-fp16-vector-intrinsics.ll | 36 +++++++++++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 5facc8d6b11..b9c4317c9cf 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2212,7 +2212,10 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic, case MVT::v8i8: case MVT::v16i8: OpcodeIndex = 0; break; case MVT::v4i16: - case MVT::v8i16: OpcodeIndex = 1; break; + case MVT::v8i16: + case MVT::v4f16: + case MVT::v8f16: + OpcodeIndex = 1; break; case MVT::v2f32: case MVT::v2i32: case MVT::v4f32: diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 2c996b571d6..ba5e255568b 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -7576,6 +7576,8 @@ def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)), (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)), (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; +def : Pat<(v8f16 (concat_vectors DPR:$Dn, DPR:$Dm)), + (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; //===----------------------------------------------------------------------===// // Assembler aliases diff --git a/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll b/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll index a9c5838f104..442ae5dfbbb 100644 --- a/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll +++ b/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll @@ -1225,6 +1225,42 @@ entry: ret <8 x half> %shuffle.i } +define <4 x half> @test_vld_dup1_4xhalf(half* %b) { +; CHECK-LABEL: test_vld_dup1_4xhalf: +; CHECK: vld1.16 {d0[]}, [r0:16] +; CHECK-NEXT: bx lr + +entry: + %b1 = load half, half* %b, align 2 + %vecinit = insertelement <4 x half> undef, half %b1, i32 0 + %vecinit2 = insertelement <4 x half> %vecinit, half %b1, i32 1 + %vecinit3 = insertelement <4 x half> %vecinit2, half %b1, i32 2 + %vecinit4 = insertelement <4 x half> %vecinit3, half %b1, i32 3 + ret <4 x half> %vecinit4 +} + +define <8 x half> @test_vld_dup1_8xhalf(half* %b) local_unnamed_addr { +; CHECK-LABEL: test_vld_dup1_8xhalf: +; CHECK: vld1.16 {d0[], d1[]}, [r0:16] +; CHECK-NEXT: bx lr + +entry: + %b1 = load half, half* %b, align 2 + %vecinit = insertelement <8 x half> undef, half %b1, i32 0 + %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer + ret <8 x half> %vecinit8 +} + +define <8 x half> @test_shufflevector8xhalf(<4 x half> %a) { +; CHECK-LABEL: test_shufflevector8xhalf: +; CHECK: vmov.f64 d1, d0 +; CHECK-NEXT: bx lr + +entry: + %r = shufflevector <4 x half> %a, <4 x half> %a, <8 x i32> + ret <8 x half> %r +} + declare <4 x half> @llvm.fabs.v4f16(<4 x half>) declare <8 x half> @llvm.fabs.v8f16(<8 x half>) declare <4 x i16> @llvm.arm.neon.vcvtas.v4i16.v4f16(<4 x half>) -- 2.50.1