From: Adhemerval Zanella Date: Thu, 6 Jun 2019 11:53:26 +0000 (+0000) Subject: [AArch64] Handle ISD::LROUND and ISD::LLROUND for float16 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e6843f5f5ddf0e973a09cb18ce5e44c1f6b89cc4;p=llvm [AArch64] Handle ISD::LROUND and ISD::LLROUND for float16 This patch is a follow up for D61391 to add lround/llround support for float16. Reviewed By: SjoerdMeijer Differential Revision: https://reviews.llvm.org/D62861 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362698 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index d9734eb3a12..dde05404365 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -3083,6 +3083,14 @@ defm : FPToIntegerPats; defm : FPToIntegerPats; defm : FPToIntegerPats; +let Predicates = [HasFullFP16] in { + def : Pat<(i32 (lround f16:$Rn)), + (!cast(FCVTASUWHr) f16:$Rn)>; + def : Pat<(i64 (lround f16:$Rn)), + (!cast(FCVTASUXHr) f16:$Rn)>; + def : Pat<(i64 (llround f16:$Rn)), + (!cast(FCVTASUXHr) f16:$Rn)>; +} def : Pat<(i32 (lround f32:$Rn)), (!cast(FCVTASUWSr) f32:$Rn)>; def : Pat<(i32 (lround f64:$Rn)), diff --git a/test/CodeGen/AArch64/llround-conv-fp16.ll b/test/CodeGen/AArch64/llround-conv-fp16.ll new file mode 100644 index 00000000000..5c914c09361 --- /dev/null +++ b/test/CodeGen/AArch64/llround-conv-fp16.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s + +; CHECK-LABEL: testmhhs: +; CHECK: fcvtas x0, h0 +; CHECK: ret +define i16 @testmhhs(half %x) { +entry: + %0 = tail call i64 @llvm.llround.i64.f16(half %x) + %conv = trunc i64 %0 to i16 + ret i16 %conv +} + +; CHECK-LABEL: testmhws: +; CHECK: fcvtas x0, h0 +; CHECK: ret +define i32 @testmhws(half %x) { +entry: + %0 = tail call i64 @llvm.llround.i64.f16(half %x) + %conv = trunc i64 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: testmhxs: +; CHECK: fcvtas x0, h0 +; CHECK-NEXT: ret +define i64 @testmhxs(half %x) { +entry: + %0 = tail call i64 @llvm.llround.i64.f16(half %x) + ret i64 %0 +} + +declare i64 @llvm.llround.i64.f16(half) nounwind readnone diff --git a/test/CodeGen/AArch64/lround-conv-fp16-win.ll b/test/CodeGen/AArch64/lround-conv-fp16-win.ll new file mode 100644 index 00000000000..5eabc2a4f46 --- /dev/null +++ b/test/CodeGen/AArch64/lround-conv-fp16-win.ll @@ -0,0 +1,33 @@ +; RUN: llc < %s -mtriple=aarch64-windows -mattr=+fullfp16 | FileCheck %s + +; CHECK-LABEL: testmhhs: +; CHECK: fcvtas w0, h0 +; CHECK: ret +define i16 @testmhhs(half %x) { +entry: + %0 = tail call i32 @llvm.lround.i32.f16(half %x) + %conv = trunc i32 %0 to i16 + ret i16 %conv +} + +; CHECK-LABEL: testmhws: +; CHECK: fcvtas w0, h0 +; CHECK: ret +define i32 @testmhws(half %x) { +entry: + %0 = tail call i32 @llvm.lround.i32.f16(half %x) + ret i32 %0 +} + +; CHECK-LABEL: testmhxs: +; CHECK: fcvtas w8, h0 +; CHECK-NEXT: sxtw x0, w8 +; CHECK-NEXT: ret +define i64 @testmhxs(half %x) { +entry: + %0 = tail call i32 @llvm.lround.i32.f16(half %x) + %conv = sext i32 %0 to i64 + ret i64 %conv +} + +declare i32 @llvm.lround.i32.f16(half) nounwind readnone diff --git a/test/CodeGen/AArch64/lround-conv-fp16.ll b/test/CodeGen/AArch64/lround-conv-fp16.ll new file mode 100644 index 00000000000..cf81047f65e --- /dev/null +++ b/test/CodeGen/AArch64/lround-conv-fp16.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s + +; CHECK-LABEL: testmhhs: +; CHECK: fcvtas x0, h0 +; CHECK: ret +define i16 @testmhhs(half %x) { +entry: + %0 = tail call i64 @llvm.lround.i64.f16(half %x) + %conv = trunc i64 %0 to i16 + ret i16 %conv +} + +; CHECK-LABEL: testmhws: +; CHECK: fcvtas x0, h0 +; CHECK: ret +define i32 @testmhws(half %x) { +entry: + %0 = tail call i64 @llvm.lround.i64.f16(half %x) + %conv = trunc i64 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: testmhxs: +; CHECK: fcvtas x0, h0 +; CHECK-NEXT: ret +define i64 @testmhxs(half %x) { +entry: + %0 = tail call i64 @llvm.lround.i64.f16(half %x) + ret i64 %0 +} + +declare i64 @llvm.lround.i64.f16(half) nounwind readnone