From b45925112631f42d35630ba7c2d6c76e26791c79 Mon Sep 17 00:00:00 2001 From: David Green Date: Sun, 26 May 2019 11:13:00 +0000 Subject: [PATCH] [ARM] Select a number of fp16 rounding functions This add patterns for fp16 round and ceil etc. Same as the float and double patterns. Differential Revision: https://reviews.llvm.org/D62326 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@361718 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 2 + lib/Target/ARM/ARMInstrVFP.td | 8 +- test/CodeGen/ARM/fp16-fullfp16.ll | 108 ++++++++++++++++---------- test/CodeGen/ARM/fp16-instructions.ll | 13 ---- 4 files changed, 72 insertions(+), 59 deletions(-) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 24e67010908..677e4d5b2e8 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1156,6 +1156,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FLOG, MVT::f16, Promote); setOperationAction(ISD::FLOG10, MVT::f16, Promote); setOperationAction(ISD::FLOG2, MVT::f16, Promote); + + setOperationAction(ISD::FROUND, MVT::f16, Legal); } if (Subtarget->hasNEON()) { diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 1f497f6d5bf..e3d2a947788 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -930,9 +930,9 @@ def VNEGH : AHuI<0b11101, 0b11, 0b0001, 0b01, 0, multiclass vrint_inst_zrx { def H : AHuI<0b11101, 0b11, 0b0110, 0b11, 0, - (outs SPR:$Sd), (ins SPR:$Sm), + (outs HPR:$Sd), (ins HPR:$Sm), NoItinerary, !strconcat("vrint", opc), ".f16\t$Sd, $Sm", - []>, + [(set (f16 HPR:$Sd), (node (f16 HPR:$Sm)))]>, Requires<[HasFullFP16]> { let Inst{7} = op2; let Inst{16} = op; @@ -975,9 +975,9 @@ multiclass vrint_inst_anpm rm, let PostEncoderMethod = "", DecoderNamespace = "VFPV8", isUnpredicable = 1 in { def H : AHuInp<0b11101, 0b11, 0b1000, 0b01, 0, - (outs SPR:$Sd), (ins SPR:$Sm), + (outs HPR:$Sd), (ins HPR:$Sm), NoItinerary, !strconcat("vrint", opc, ".f16\t$Sd, $Sm"), - []>, + [(set (f16 HPR:$Sd), (node (f16 HPR:$Sm)))]>, Requires<[HasFullFP16]> { let Inst{17-16} = rm; } diff --git a/test/CodeGen/ARM/fp16-fullfp16.ll b/test/CodeGen/ARM/fp16-fullfp16.ll index 5584e7ef88c..19afba05db6 100644 --- a/test/CodeGen/ARM/fp16-fullfp16.ll +++ b/test/CodeGen/ARM/fp16-fullfp16.ll @@ -488,53 +488,77 @@ define void @test_copysign(half* %p, half* %q) { ret void } -; FIXME -;define void @test_floor(half* %p) { -; %a = load half, half* %p, align 2 -; %r = call half @llvm.floor.f16(half %a) -; store half %r, half* %p -; ret void -;} +define void @test_floor(half* %p) { +; CHECK-LABEL: test_floor: +; CHECK: vldr.16 s0, [r0] +; CHECK-NEXT: vrintm.f16 s0, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr + %a = load half, half* %p, align 2 + %r = call half @llvm.floor.f16(half %a) + store half %r, half* %p + ret void +} -; FIXME -;define void @test_ceil(half* %p) { -; %a = load half, half* %p, align 2 -; %r = call half @llvm.ceil.f16(half %a) -; store half %r, half* %p -; ret void -;} +define void @test_ceil(half* %p) { +; CHECK-LABEL: test_ceil: +; CHECK: vldr.16 s0, [r0] +; CHECK-NEXT: vrintp.f16 s0, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr + %a = load half, half* %p, align 2 + %r = call half @llvm.ceil.f16(half %a) + store half %r, half* %p + ret void +} -; FIXME -;define void @test_trunc(half* %p) { -; %a = load half, half* %p, align 2 -; %r = call half @llvm.trunc.f16(half %a) -; store half %r, half* %p -; ret void -;} +define void @test_trunc(half* %p) { +; CHECK-LABEL: test_trunc: +; CHECK: vldr.16 s0, [r0] +; CHECK-NEXT: vrintz.f16 s0, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr + %a = load half, half* %p, align 2 + %r = call half @llvm.trunc.f16(half %a) + store half %r, half* %p + ret void +} -; FIXME -;define void @test_rint(half* %p) { -; %a = load half, half* %p, align 2 -; %r = call half @llvm.rint.f16(half %a) -; store half %r, half* %p -; ret void -;} +define void @test_rint(half* %p) { +; CHECK-LABEL: test_rint: +; CHECK: vldr.16 s0, [r0] +; CHECK-NEXT: vrintx.f16 s0, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr + %a = load half, half* %p, align 2 + %r = call half @llvm.rint.f16(half %a) + store half %r, half* %p + ret void +} -; FIXME -;define void @test_nearbyint(half* %p) { -; %a = load half, half* %p, align 2 -; %r = call half @llvm.nearbyint.f16(half %a) -; store half %r, half* %p -; ret void -;} +define void @test_nearbyint(half* %p) { +; CHECK-LABEL: test_nearbyint: +; CHECK: vldr.16 s0, [r0] +; CHECK-NEXT: vrintr.f16 s0, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr + %a = load half, half* %p, align 2 + %r = call half @llvm.nearbyint.f16(half %a) + store half %r, half* %p + ret void +} -; FIXME -;define void @test_round(half* %p) { -; %a = load half, half* %p, align 2 -; %r = call half @llvm.round.f16(half %a) -; store half %r, half* %p -; ret void -;} +define void @test_round(half* %p) { +; CHECK-LABEL: test_round: +; CHECK: vldr.16 s0, [r0] +; CHECK-NEXT: vrinta.f16 s0, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr + %a = load half, half* %p, align 2 + %r = call half @llvm.round.f16(half %a) + store half %r, half* %p + ret void +} define void @test_fmuladd(half* %p, half* %q, half* %r) { ; CHECK-LABEL: test_fmuladd: diff --git a/test/CodeGen/ARM/fp16-instructions.ll b/test/CodeGen/ARM/fp16-instructions.ll index ef0d4834f5a..514d3c7ae0a 100644 --- a/test/CodeGen/ARM/fp16-instructions.ll +++ b/test/CodeGen/ARM/fp16-instructions.ll @@ -43,8 +43,6 @@ entry: ; CHECK-HARDFP-FULLFP16: {{.*}} lr } -; 1. VABS: TODO - ; 2. VADD define float @Add(float %a.coerce, float %b.coerce) { entry: @@ -691,15 +689,6 @@ entry: ; CHECK-HARDFP-FULLFP16: vnmul.f16 s0, s0, s1 } -; TODO: -; 28. VRINTA -; 29. VRINTM -; 30. VRINTN -; 31. VRINTP -; 32. VRINTR -; 33. VRINTX -; 34. VRINTZ - ; 35. VSELEQ define half @select_cc1(half* %a0) { %1 = load half, half* %a0 @@ -955,8 +944,6 @@ entry: ; CHECK-SOFTFP-FP16-T32-NEXT: vcvtb.f16.f32 s0, [[S4]] } -; 39. VSQRT - TODO - ; 40. VSUB define float @Sub(float %a.coerce, float %b.coerce) { entry: -- 2.40.0