From f173f479c2e5b75a94a2f6706d745732bc68ad6e Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Fri, 13 Jul 2018 22:09:30 +0000
Subject: [PATCH] [X86][FastISel] Support uitofp with avx512.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@337055 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86FastISel.cpp                |  34 +++-
 .../fast-isel-uint-float-conversion-x86-64.ll |  69 ++++++++
 .../X86/fast-isel-uint-float-conversion.ll    | 160 ++++++++++++++++++
 3 files changed, 255 insertions(+), 8 deletions(-)
 create mode 100644 test/CodeGen/X86/fast-isel-uint-float-conversion-x86-64.ll
 create mode 100644 test/CodeGen/X86/fast-isel-uint-float-conversion.ll

diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index acbf01b3318..de8b40f28a8 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -134,6 +134,8 @@ private:
   bool X86SelectFPExt(const Instruction *I);
   bool X86SelectFPTrunc(const Instruction *I);
   bool X86SelectSIToFP(const Instruction *I);
+  bool X86SelectUIToFP(const Instruction *I);
+  bool X86SelectIntToFP(const Instruction *I, bool IsSigned);
 
   const X86InstrInfo *getInstrInfo() const {
     return Subtarget->getInstrInfo();
@@ -2410,11 +2412,14 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) {
   return false;
 }
 
-bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
+// Common code for X86SelectSIToFP and X86SelectUIToFP.
+bool X86FastISel::X86SelectIntToFP(const Instruction *I, bool IsSigned) {
   // The target-independent selection algorithm in FastISel already knows how
   // to select a SINT_TO_FP if the target is SSE but not AVX.
   // Early exit if the subtarget doesn't have AVX.
-  if (!Subtarget->hasAVX())
+  // Unsigned conversion requires avx512.
+  bool HasAVX512 = Subtarget->hasAVX512();
+  if (!Subtarget->hasAVX() || (!IsSigned && !HasAVX512))
     return false;
 
   // TODO: We could sign extend narrower types.
@@ -2429,21 +2434,24 @@ bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
 
   unsigned Opcode;
 
-  static const uint16_t CvtOpc[2][2][2] = {
+  static const uint16_t SCvtOpc[2][2][2] = {
     { { X86::VCVTSI2SSrr,  X86::VCVTSI642SSrr },
       { X86::VCVTSI2SDrr,  X86::VCVTSI642SDrr } },
     { { X86::VCVTSI2SSZrr, X86::VCVTSI642SSZrr },
       { X86::VCVTSI2SDZrr, X86::VCVTSI642SDZrr } },
   };
-  bool HasAVX512 = Subtarget->hasAVX512();
+  static const uint16_t UCvtOpc[2][2] = {
+    { X86::VCVTUSI2SSZrr, X86::VCVTUSI642SSZrr },
+    { X86::VCVTUSI2SDZrr, X86::VCVTUSI642SDZrr },
+  };
   bool Is64Bit = SrcVT == MVT::i64;
 
   if (I->getType()->isDoubleTy()) {
-    // sitofp int -> double
-    Opcode = CvtOpc[HasAVX512][1][Is64Bit];
+    // s/uitofp int -> double
+    Opcode = IsSigned ? SCvtOpc[HasAVX512][1][Is64Bit] : UCvtOpc[1][Is64Bit];
   } else if (I->getType()->isFloatTy()) {
-    // sitofp int -> float
-    Opcode = CvtOpc[HasAVX512][0][Is64Bit];
+    // s/uitofp int -> float
+    Opcode = IsSigned ? SCvtOpc[HasAVX512][0][Is64Bit] : UCvtOpc[0][Is64Bit];
   } else
     return false;
 
@@ -2458,6 +2466,14 @@ bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
   return true;
 }
 
+bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
+  return X86SelectIntToFP(I, /*IsSigned*/true);
+}
+
+bool X86FastISel::X86SelectUIToFP(const Instruction *I) {
+  return X86SelectIntToFP(I, /*IsSigned*/false);
+}
+
 // Helper method used by X86SelectFPExt and X86SelectFPTrunc.
 bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
                                           unsigned TargetOpc,
@@ -3632,6 +3648,8 @@ X86FastISel::fastSelectInstruction(const Instruction *I)  {
     return X86SelectFPTrunc(I);
   case Instruction::SIToFP:
     return X86SelectSIToFP(I);
+  case Instruction::UIToFP:
+    return X86SelectUIToFP(I);
   case Instruction::IntToPtr: // Deliberate fall-through.
   case Instruction::PtrToInt: {
     EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
diff --git a/test/CodeGen/X86/fast-isel-uint-float-conversion-x86-64.ll b/test/CodeGen/X86/fast-isel-uint-float-conversion-x86-64.ll
new file mode 100644
index 00000000000..60d2903ad09
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-uint-float-conversion-x86-64.ll
@@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+avx512f -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX
+
+
+define double @long_to_double_rr(i64 %a) {
+; ALL-LABEL: long_to_double_rr:
+; ALL:       # %bb.0: # %entry
+; ALL-NEXT:    vcvtusi2sdq %rdi, %xmm0, %xmm0
+; ALL-NEXT:    retq
+entry:
+  %0 = uitofp i64 %a to double
+  ret double %0
+}
+
+define double @long_to_double_rm(i64* %a) {
+; ALL-LABEL: long_to_double_rm:
+; ALL:       # %bb.0: # %entry
+; ALL-NEXT:    movq (%rdi), %rax
+; ALL-NEXT:    vcvtusi2sdq %rax, %xmm0, %xmm0
+; ALL-NEXT:    retq
+entry:
+  %0 = load i64, i64* %a
+  %1 = uitofp i64 %0 to double
+  ret double %1
+}
+
+define double @long_to_double_rm_optsize(i64* %a) optsize {
+; ALL-LABEL: long_to_double_rm_optsize:
+; ALL:       # %bb.0: # %entry
+; ALL-NEXT:    vcvtusi2sdq (%rdi), %xmm0, %xmm0
+; ALL-NEXT:    retq
+entry:
+  %0 = load i64, i64* %a
+  %1 = uitofp i64 %0 to double
+  ret double %1
+}
+
+define float @long_to_float_rr(i64 %a) {
+; ALL-LABEL: long_to_float_rr:
+; ALL:       # %bb.0: # %entry
+; ALL-NEXT:    vcvtusi2ssq %rdi, %xmm0, %xmm0
+; ALL-NEXT:    retq
+entry:
+  %0 = uitofp i64 %a to float
+  ret float %0
+}
+
+define float @long_to_float_rm(i64* %a) {
+; ALL-LABEL: long_to_float_rm:
+; ALL:       # %bb.0: # %entry
+; ALL-NEXT:    movq (%rdi), %rax
+; ALL-NEXT:    vcvtusi2ssq %rax, %xmm0, %xmm0
+; ALL-NEXT:    retq
+entry:
+  %0 = load i64, i64* %a
+  %1 = uitofp i64 %0 to float
+  ret float %1
+}
+
+define float @long_to_float_rm_optsize(i64* %a) optsize {
+; ALL-LABEL: long_to_float_rm_optsize:
+; ALL:       # %bb.0: # %entry
+; ALL-NEXT:    vcvtusi2ssq (%rdi), %xmm0, %xmm0
+; ALL-NEXT:    retq
+entry:
+  %0 = load i64, i64* %a
+  %1 = uitofp i64 %0 to float
+  ret float %1
+}
diff --git a/test/CodeGen/X86/fast-isel-uint-float-conversion.ll b/test/CodeGen/X86/fast-isel-uint-float-conversion.ll
new file mode 100644
index 00000000000..6aad161d406
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-uint-float-conversion.ll
@@ -0,0 +1,160 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+avx512f -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=AVX
+; RUN: llc -verify-machineinstrs -mtriple=i686-unknown-unknown -mcpu=generic -mattr=+avx512f -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=AVX_X86
+
+
+define double @int_to_double_rr(i32 %a) {
+; AVX-LABEL: int_to_double_rr:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvtusi2sdl %edi, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX_X86-LABEL: int_to_double_rr:
+; AVX_X86:       # %bb.0: # %entry
+; AVX_X86-NEXT:    pushl %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 8
+; AVX_X86-NEXT:    .cfi_offset %ebp, -8
+; AVX_X86-NEXT:    movl %esp, %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa_register %ebp
+; AVX_X86-NEXT:    andl $-8, %esp
+; AVX_X86-NEXT:    subl $8, %esp
+; AVX_X86-NEXT:    movl 8(%ebp), %eax
+; AVX_X86-NEXT:    vcvtusi2sdl %eax, %xmm0, %xmm0
+; AVX_X86-NEXT:    vmovsd %xmm0, (%esp)
+; AVX_X86-NEXT:    fldl (%esp)
+; AVX_X86-NEXT:    movl %ebp, %esp
+; AVX_X86-NEXT:    popl %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa %esp, 4
+; AVX_X86-NEXT:    retl
+entry:
+  %0 = uitofp i32 %a to double
+  ret double %0
+}
+
+define double @int_to_double_rm(i32* %a) {
+; AVX-LABEL: int_to_double_rm:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    movl (%rdi), %eax
+; AVX-NEXT:    vcvtusi2sdl %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX_X86-LABEL: int_to_double_rm:
+; AVX_X86:       # %bb.0: # %entry
+; AVX_X86-NEXT:    pushl %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 8
+; AVX_X86-NEXT:    .cfi_offset %ebp, -8
+; AVX_X86-NEXT:    movl %esp, %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa_register %ebp
+; AVX_X86-NEXT:    andl $-8, %esp
+; AVX_X86-NEXT:    subl $8, %esp
+; AVX_X86-NEXT:    movl 8(%ebp), %eax
+; AVX_X86-NEXT:    vcvtusi2sdl (%eax), %xmm0, %xmm0
+; AVX_X86-NEXT:    vmovsd %xmm0, (%esp)
+; AVX_X86-NEXT:    fldl (%esp)
+; AVX_X86-NEXT:    movl %ebp, %esp
+; AVX_X86-NEXT:    popl %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa %esp, 4
+; AVX_X86-NEXT:    retl
+entry:
+  %0 = load i32, i32* %a
+  %1 = uitofp i32 %0 to double
+  ret double %1
+}
+
+define double @int_to_double_rm_optsize(i32* %a) optsize {
+; AVX-LABEL: int_to_double_rm_optsize:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvtusi2sdl (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX_X86-LABEL: int_to_double_rm_optsize:
+; AVX_X86:       # %bb.0: # %entry
+; AVX_X86-NEXT:    pushl %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 8
+; AVX_X86-NEXT:    .cfi_offset %ebp, -8
+; AVX_X86-NEXT:    movl %esp, %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa_register %ebp
+; AVX_X86-NEXT:    andl $-8, %esp
+; AVX_X86-NEXT:    subl $8, %esp
+; AVX_X86-NEXT:    movl 8(%ebp), %eax
+; AVX_X86-NEXT:    vcvtusi2sdl (%eax), %xmm0, %xmm0
+; AVX_X86-NEXT:    vmovsd %xmm0, (%esp)
+; AVX_X86-NEXT:    fldl (%esp)
+; AVX_X86-NEXT:    movl %ebp, %esp
+; AVX_X86-NEXT:    popl %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa %esp, 4
+; AVX_X86-NEXT:    retl
+entry:
+  %0 = load i32, i32* %a
+  %1 = uitofp i32 %0 to double
+  ret double %1
+}
+
+define float @int_to_float_rr(i32 %a) {
+; AVX-LABEL: int_to_float_rr:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvtusi2ssl %edi, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX_X86-LABEL: int_to_float_rr:
+; AVX_X86:       # %bb.0: # %entry
+; AVX_X86-NEXT:    pushl %eax
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 8
+; AVX_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX_X86-NEXT:    vcvtusi2ssl %eax, %xmm0, %xmm0
+; AVX_X86-NEXT:    vmovss %xmm0, (%esp)
+; AVX_X86-NEXT:    flds (%esp)
+; AVX_X86-NEXT:    popl %eax
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 4
+; AVX_X86-NEXT:    retl
+entry:
+  %0 = uitofp i32 %a to float
+  ret float %0
+}
+
+define float @int_to_float_rm(i32* %a) {
+; AVX-LABEL: int_to_float_rm:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    movl (%rdi), %eax
+; AVX-NEXT:    vcvtusi2ssl %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX_X86-LABEL: int_to_float_rm:
+; AVX_X86:       # %bb.0: # %entry
+; AVX_X86-NEXT:    pushl %eax
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 8
+; AVX_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX_X86-NEXT:    vcvtusi2ssl (%eax), %xmm0, %xmm0
+; AVX_X86-NEXT:    vmovss %xmm0, (%esp)
+; AVX_X86-NEXT:    flds (%esp)
+; AVX_X86-NEXT:    popl %eax
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 4
+; AVX_X86-NEXT:    retl
+entry:
+  %0 = load i32, i32* %a
+  %1 = uitofp i32 %0 to float
+  ret float %1
+}
+
+define float @int_to_float_rm_optsize(i32* %a) optsize {
+; AVX-LABEL: int_to_float_rm_optsize:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvtusi2ssl (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX_X86-LABEL: int_to_float_rm_optsize:
+; AVX_X86:       # %bb.0: # %entry
+; AVX_X86-NEXT:    pushl %eax
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 8
+; AVX_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX_X86-NEXT:    vcvtusi2ssl (%eax), %xmm0, %xmm0
+; AVX_X86-NEXT:    vmovss %xmm0, (%esp)
+; AVX_X86-NEXT:    flds (%esp)
+; AVX_X86-NEXT:    popl %eax
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 4
+; AVX_X86-NEXT:    retl
+entry:
+  %0 = load i32, i32* %a
+  %1 = uitofp i32 %0 to float
+  ret float %1
+}
-- 
2.50.1