From de5be4657f8311b78bcea068c282c61351efcdfb Mon Sep 17 00:00:00 2001 From: Bradley Smith Date: Mon, 23 Mar 2015 15:59:54 +0000 Subject: [PATCH] [ARM] Add more pattern matching for f16 <-> f64 conversions Specifically when the conversion is done in two steps, f16 -> f32 -> f64. For example: %1 = tail call float @llvm.convert.from.fp16.f32(i16 %0) %conv = fpext float %1 to double to: vcvtb.f64.f16 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@232954 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrVFP.td | 8 ++++++++ test/CodeGen/ARM/fp16-64.ll | 31 +++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 test/CodeGen/ARM/fp16-64.ll diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index e0a93149916..cc8a2b0b4c6 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -627,6 +627,14 @@ def : Pat<(f16_to_fp GPR:$a), def : Pat<(f64 (f16_to_fp GPR:$a)), (VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>; +def : Pat<(f64 (fextend (f16_to_fp GPR:$a))), + (VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>, + Requires<[HasFPARMv8, HasDPVFP]>; + +def : Pat<(fp_to_f16 (fround (f64 DPR:$a))), + (i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>, + Requires<[HasFPARMv8, HasDPVFP]>; + multiclass vcvt_inst rm, SDPatternOperator node = null_frag> { let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { diff --git a/test/CodeGen/ARM/fp16-64.ll b/test/CodeGen/ARM/fp16-64.ll new file mode 100644 index 00000000000..854ba9adb84 --- /dev/null +++ b/test/CodeGen/ARM/fp16-64.ll @@ -0,0 +1,31 @@ +; RUN: llc -mtriple=arm -mattr=+fp-armv8 < %s | \ +; RUN: FileCheck --check-prefix=CHECK --check-prefix=V8 %s +; RUN: llc -mtriple=arm -mattr=+vfp3,+d16 < %s | \ +; RUN: FileCheck --check-prefix=CHECK --check-prefix=NOV8 %s + +declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone +declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone + +define void @vcvt_f64_f16(i16* %x, double* %y) nounwind { +entry: +; CHECK-LABEL: vcvt_f64_f16 + %0 = load i16, i16* %x, align 2 + %1 = tail call float @llvm.convert.from.fp16.f32(i16 %0) + %conv = fpext float %1 to double +; CHECK-V8: vcvtb.f64.f16 +; CHECK-NOV8-NOT: vcvtb.f64.f16 + store double %conv, double* %y, align 8 + ret void +} + +define void @vcvt_f16_f64(i16* %x, double* %y) nounwind { +entry: +; CHECK-LABEL: vcvt_f16_f64 + %0 = load double, double* %y, align 8 + %conv = fptrunc double %0 to float +; CHECK-V8: vcvtb.f16.f64 +; CHECK-NOV8-NOT: vcvtb.f16.f64 + %1 = tail call i16 @llvm.convert.to.fp16.f32(float %conv) + store i16 %1, i16* %x, align 2 + ret void +} -- 2.40.0