From b45925112631f42d35630ba7c2d6c76e26791c79 Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Sun, 26 May 2019 11:13:00 +0000
Subject: [PATCH] [ARM] Select a number of fp16 rounding functions

This add patterns for fp16 round and ceil etc. Same as the float and double
patterns.

Differential Revision: https://reviews.llvm.org/D62326


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@361718 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/ARM/ARMISelLowering.cpp    |   2 +
 lib/Target/ARM/ARMInstrVFP.td         |   8 +-
 test/CodeGen/ARM/fp16-fullfp16.ll     | 108 ++++++++++++++++----------
 test/CodeGen/ARM/fp16-instructions.ll |  13 ----
 4 files changed, 72 insertions(+), 59 deletions(-)
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 24e67010908..677e4d5b2e8 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1156,6 +1156,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FLOG, MVT::f16, Promote);
     setOperationAction(ISD::FLOG10, MVT::f16, Promote);
     setOperationAction(ISD::FLOG2, MVT::f16, Promote);
+
+    setOperationAction(ISD::FROUND, MVT::f16, Legal);
   }
 
   if (Subtarget->hasNEON()) {
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 1f497f6d5bf..e3d2a947788 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -930,9 +930,9 @@ def VNEGH  : AHuI<0b11101, 0b11, 0b0001, 0b01, 0,
 
 multiclass vrint_inst_zrx<string opc, bit op, bit op2, SDPatternOperator node> {
   def H : AHuI<0b11101, 0b11, 0b0110, 0b11, 0,
-               (outs SPR:$Sd), (ins SPR:$Sm),
+               (outs HPR:$Sd), (ins HPR:$Sm),
                NoItinerary, !strconcat("vrint", opc), ".f16\t$Sd, $Sm",
-               []>,
+               [(set (f16 HPR:$Sd), (node (f16 HPR:$Sm)))]>,
                Requires<[HasFullFP16]> {
     let Inst{7} = op2;
     let Inst{16} = op;
@@ -975,9 +975,9 @@ multiclass vrint_inst_anpm<string opc, bits<2> rm,
   let PostEncoderMethod = "", DecoderNamespace = "VFPV8",
       isUnpredicable = 1 in {
     def H : AHuInp<0b11101, 0b11, 0b1000, 0b01, 0,
-                   (outs SPR:$Sd), (ins SPR:$Sm),
+                   (outs HPR:$Sd), (ins HPR:$Sm),
                    NoItinerary, !strconcat("vrint", opc, ".f16\t$Sd, $Sm"),
-                   []>,
+                   [(set (f16 HPR:$Sd), (node (f16 HPR:$Sm)))]>,
                    Requires<[HasFullFP16]> {
       let Inst{17-16} = rm;
     }
diff --git a/test/CodeGen/ARM/fp16-fullfp16.ll b/test/CodeGen/ARM/fp16-fullfp16.ll
index 5584e7ef88c..19afba05db6 100644
--- a/test/CodeGen/ARM/fp16-fullfp16.ll
+++ b/test/CodeGen/ARM/fp16-fullfp16.ll
@@ -488,53 +488,77 @@ define void @test_copysign(half* %p, half* %q) {
   ret void
 }
 
-; FIXME
-;define void @test_floor(half* %p) {
-;  %a = load half, half* %p, align 2
-;  %r = call half @llvm.floor.f16(half %a)
-;  store half %r, half* %p
-;  ret void
-;}
+define void @test_floor(half* %p) {
+; CHECK-LABEL: test_floor:
+; CHECK:         vldr.16 s0, [r0]
+; CHECK-NEXT:    vrintm.f16 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %r = call half @llvm.floor.f16(half %a)
+  store half %r, half* %p
+  ret void
+}
 
-; FIXME
-;define void @test_ceil(half* %p) {
-;  %a = load half, half* %p, align 2
-;  %r = call half @llvm.ceil.f16(half %a)
-;  store half %r, half* %p
-;  ret void
-;}
+define void @test_ceil(half* %p) {
+; CHECK-LABEL: test_ceil:
+; CHECK:         vldr.16 s0, [r0]
+; CHECK-NEXT:    vrintp.f16 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %r = call half @llvm.ceil.f16(half %a)
+  store half %r, half* %p
+  ret void
+}
 
-; FIXME
-;define void @test_trunc(half* %p) {
-;  %a = load half, half* %p, align 2
-;  %r = call half @llvm.trunc.f16(half %a)
-;  store half %r, half* %p
-;  ret void
-;}
+define void @test_trunc(half* %p) {
+; CHECK-LABEL: test_trunc:
+; CHECK:         vldr.16 s0, [r0]
+; CHECK-NEXT:    vrintz.f16 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %r = call half @llvm.trunc.f16(half %a)
+  store half %r, half* %p
+  ret void
+}
 
-; FIXME
-;define void @test_rint(half* %p) {
-;  %a = load half, half* %p, align 2
-;  %r = call half @llvm.rint.f16(half %a)
-;  store half %r, half* %p
-;  ret void
-;}
+define void @test_rint(half* %p) {
+; CHECK-LABEL: test_rint:
+; CHECK:         vldr.16 s0, [r0]
+; CHECK-NEXT:    vrintx.f16 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %r = call half @llvm.rint.f16(half %a)
+  store half %r, half* %p
+  ret void
+}
 
-; FIXME
-;define void @test_nearbyint(half* %p) {
-;  %a = load half, half* %p, align 2
-;  %r = call half @llvm.nearbyint.f16(half %a)
-;  store half %r, half* %p
-;  ret void
-;}
+define void @test_nearbyint(half* %p) {
+; CHECK-LABEL: test_nearbyint:
+; CHECK:         vldr.16 s0, [r0]
+; CHECK-NEXT:    vrintr.f16 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %r = call half @llvm.nearbyint.f16(half %a)
+  store half %r, half* %p
+  ret void
+}
 
-; FIXME
-;define void @test_round(half* %p) {
-;  %a = load half, half* %p, align 2
-;  %r = call half @llvm.round.f16(half %a)
-;  store half %r, half* %p
-;  ret void
-;}
+define void @test_round(half* %p) {
+; CHECK-LABEL: test_round:
+; CHECK:         vldr.16 s0, [r0]
+; CHECK-NEXT:    vrinta.f16 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %r = call half @llvm.round.f16(half %a)
+  store half %r, half* %p
+  ret void
+}
 
 define void @test_fmuladd(half* %p, half* %q, half* %r) {
 ; CHECK-LABEL: test_fmuladd:
diff --git a/test/CodeGen/ARM/fp16-instructions.ll b/test/CodeGen/ARM/fp16-instructions.ll
index ef0d4834f5a..514d3c7ae0a 100644
--- a/test/CodeGen/ARM/fp16-instructions.ll
+++ b/test/CodeGen/ARM/fp16-instructions.ll
@@ -43,8 +43,6 @@ entry:
 ; CHECK-HARDFP-FULLFP16:  {{.*}} lr
 }
 
-; 1. VABS: TODO
-
 ; 2. VADD
 define float @Add(float %a.coerce, float %b.coerce) {
 entry:
@@ -691,15 +689,6 @@ entry:
 ; CHECK-HARDFP-FULLFP16:       vnmul.f16  s0, s0, s1
 }
 
-; TODO:
-; 28. VRINTA
-; 29. VRINTM
-; 30. VRINTN
-; 31. VRINTP
-; 32. VRINTR
-; 33. VRINTX
-; 34. VRINTZ
-
 ; 35. VSELEQ
 define half @select_cc1(half* %a0)  {
   %1 = load half, half* %a0
@@ -955,8 +944,6 @@ entry:
 ; CHECK-SOFTFP-FP16-T32-NEXT:  vcvtb.f16.f32 s0, [[S4]]
 }
 
-; 39. VSQRT - TODO
-
 ; 40. VSUB
 define float @Sub(float %a.coerce, float %b.coerce) {
 entry:
-- 
2.40.0