From f173f479c2e5b75a94a2f6706d745732bc68ad6e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 13 Jul 2018 22:09:30 +0000 Subject: [PATCH] [X86][FastISel] Support uitofp with avx512. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@337055 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 34 +++- .../fast-isel-uint-float-conversion-x86-64.ll | 69 ++++++++ .../X86/fast-isel-uint-float-conversion.ll | 160 ++++++++++++++++++ 3 files changed, 255 insertions(+), 8 deletions(-) create mode 100644 test/CodeGen/X86/fast-isel-uint-float-conversion-x86-64.ll create mode 100644 test/CodeGen/X86/fast-isel-uint-float-conversion.ll diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index acbf01b3318..de8b40f28a8 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -134,6 +134,8 @@ private: bool X86SelectFPExt(const Instruction *I); bool X86SelectFPTrunc(const Instruction *I); bool X86SelectSIToFP(const Instruction *I); + bool X86SelectUIToFP(const Instruction *I); + bool X86SelectIntToFP(const Instruction *I, bool IsSigned); const X86InstrInfo *getInstrInfo() const { return Subtarget->getInstrInfo(); @@ -2410,11 +2412,14 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) { return false; } -bool X86FastISel::X86SelectSIToFP(const Instruction *I) { +// Common code for X86SelectSIToFP and X86SelectUIToFP. +bool X86FastISel::X86SelectIntToFP(const Instruction *I, bool IsSigned) { // The target-independent selection algorithm in FastISel already knows how // to select a SINT_TO_FP if the target is SSE but not AVX. // Early exit if the subtarget doesn't have AVX. - if (!Subtarget->hasAVX()) + // Unsigned conversion requires avx512. + bool HasAVX512 = Subtarget->hasAVX512(); + if (!Subtarget->hasAVX() || (!IsSigned && !HasAVX512)) return false; // TODO: We could sign extend narrower types. @@ -2429,21 +2434,24 @@ bool X86FastISel::X86SelectSIToFP(const Instruction *I) { unsigned Opcode; - static const uint16_t CvtOpc[2][2][2] = { + static const uint16_t SCvtOpc[2][2][2] = { { { X86::VCVTSI2SSrr, X86::VCVTSI642SSrr }, { X86::VCVTSI2SDrr, X86::VCVTSI642SDrr } }, { { X86::VCVTSI2SSZrr, X86::VCVTSI642SSZrr }, { X86::VCVTSI2SDZrr, X86::VCVTSI642SDZrr } }, }; - bool HasAVX512 = Subtarget->hasAVX512(); + static const uint16_t UCvtOpc[2][2] = { + { X86::VCVTUSI2SSZrr, X86::VCVTUSI642SSZrr }, + { X86::VCVTUSI2SDZrr, X86::VCVTUSI642SDZrr }, + }; bool Is64Bit = SrcVT == MVT::i64; if (I->getType()->isDoubleTy()) { - // sitofp int -> double - Opcode = CvtOpc[HasAVX512][1][Is64Bit]; + // s/uitofp int -> double + Opcode = IsSigned ? SCvtOpc[HasAVX512][1][Is64Bit] : UCvtOpc[1][Is64Bit]; } else if (I->getType()->isFloatTy()) { - // sitofp int -> float - Opcode = CvtOpc[HasAVX512][0][Is64Bit]; + // s/uitofp int -> float + Opcode = IsSigned ? SCvtOpc[HasAVX512][0][Is64Bit] : UCvtOpc[0][Is64Bit]; } else return false; @@ -2458,6 +2466,14 @@ bool X86FastISel::X86SelectSIToFP(const Instruction *I) { return true; } +bool X86FastISel::X86SelectSIToFP(const Instruction *I) { + return X86SelectIntToFP(I, /*IsSigned*/true); +} + +bool X86FastISel::X86SelectUIToFP(const Instruction *I) { + return X86SelectIntToFP(I, /*IsSigned*/false); +} + // Helper method used by X86SelectFPExt and X86SelectFPTrunc. bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned TargetOpc, @@ -3632,6 +3648,8 @@ X86FastISel::fastSelectInstruction(const Instruction *I) { return X86SelectFPTrunc(I); case Instruction::SIToFP: return X86SelectSIToFP(I); + case Instruction::UIToFP: + return X86SelectUIToFP(I); case Instruction::IntToPtr: // Deliberate fall-through. case Instruction::PtrToInt: { EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType()); diff --git a/test/CodeGen/X86/fast-isel-uint-float-conversion-x86-64.ll b/test/CodeGen/X86/fast-isel-uint-float-conversion-x86-64.ll new file mode 100644 index 00000000000..60d2903ad09 --- /dev/null +++ b/test/CodeGen/X86/fast-isel-uint-float-conversion-x86-64.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+avx512f -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX + + +define double @long_to_double_rr(i64 %a) { +; ALL-LABEL: long_to_double_rr: +; ALL: # %bb.0: # %entry +; ALL-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0 +; ALL-NEXT: retq +entry: + %0 = uitofp i64 %a to double + ret double %0 +} + +define double @long_to_double_rm(i64* %a) { +; ALL-LABEL: long_to_double_rm: +; ALL: # %bb.0: # %entry +; ALL-NEXT: movq (%rdi), %rax +; ALL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm0 +; ALL-NEXT: retq +entry: + %0 = load i64, i64* %a + %1 = uitofp i64 %0 to double + ret double %1 +} + +define double @long_to_double_rm_optsize(i64* %a) optsize { +; ALL-LABEL: long_to_double_rm_optsize: +; ALL: # %bb.0: # %entry +; ALL-NEXT: vcvtusi2sdq (%rdi), %xmm0, %xmm0 +; ALL-NEXT: retq +entry: + %0 = load i64, i64* %a + %1 = uitofp i64 %0 to double + ret double %1 +} + +define float @long_to_float_rr(i64 %a) { +; ALL-LABEL: long_to_float_rr: +; ALL: # %bb.0: # %entry +; ALL-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0 +; ALL-NEXT: retq +entry: + %0 = uitofp i64 %a to float + ret float %0 +} + +define float @long_to_float_rm(i64* %a) { +; ALL-LABEL: long_to_float_rm: +; ALL: # %bb.0: # %entry +; ALL-NEXT: movq (%rdi), %rax +; ALL-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm0 +; ALL-NEXT: retq +entry: + %0 = load i64, i64* %a + %1 = uitofp i64 %0 to float + ret float %1 +} + +define float @long_to_float_rm_optsize(i64* %a) optsize { +; ALL-LABEL: long_to_float_rm_optsize: +; ALL: # %bb.0: # %entry +; ALL-NEXT: vcvtusi2ssq (%rdi), %xmm0, %xmm0 +; ALL-NEXT: retq +entry: + %0 = load i64, i64* %a + %1 = uitofp i64 %0 to float + ret float %1 +} diff --git a/test/CodeGen/X86/fast-isel-uint-float-conversion.ll b/test/CodeGen/X86/fast-isel-uint-float-conversion.ll new file mode 100644 index 00000000000..6aad161d406 --- /dev/null +++ b/test/CodeGen/X86/fast-isel-uint-float-conversion.ll @@ -0,0 +1,160 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+avx512f -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=AVX +; RUN: llc -verify-machineinstrs -mtriple=i686-unknown-unknown -mcpu=generic -mattr=+avx512f -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=AVX_X86 + + +define double @int_to_double_rr(i32 %a) { +; AVX-LABEL: int_to_double_rr: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX_X86-LABEL: int_to_double_rr: +; AVX_X86: # %bb.0: # %entry +; AVX_X86-NEXT: pushl %ebp +; AVX_X86-NEXT: .cfi_def_cfa_offset 8 +; AVX_X86-NEXT: .cfi_offset %ebp, -8 +; AVX_X86-NEXT: movl %esp, %ebp +; AVX_X86-NEXT: .cfi_def_cfa_register %ebp +; AVX_X86-NEXT: andl $-8, %esp +; AVX_X86-NEXT: subl $8, %esp +; AVX_X86-NEXT: movl 8(%ebp), %eax +; AVX_X86-NEXT: vcvtusi2sdl %eax, %xmm0, %xmm0 +; AVX_X86-NEXT: vmovsd %xmm0, (%esp) +; AVX_X86-NEXT: fldl (%esp) +; AVX_X86-NEXT: movl %ebp, %esp +; AVX_X86-NEXT: popl %ebp +; AVX_X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX_X86-NEXT: retl +entry: + %0 = uitofp i32 %a to double + ret double %0 +} + +define double @int_to_double_rm(i32* %a) { +; AVX-LABEL: int_to_double_rm: +; AVX: # %bb.0: # %entry +; AVX-NEXT: movl (%rdi), %eax +; AVX-NEXT: vcvtusi2sdl %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX_X86-LABEL: int_to_double_rm: +; AVX_X86: # %bb.0: # %entry +; AVX_X86-NEXT: pushl %ebp +; AVX_X86-NEXT: .cfi_def_cfa_offset 8 +; AVX_X86-NEXT: .cfi_offset %ebp, -8 +; AVX_X86-NEXT: movl %esp, %ebp +; AVX_X86-NEXT: .cfi_def_cfa_register %ebp +; AVX_X86-NEXT: andl $-8, %esp +; AVX_X86-NEXT: subl $8, %esp +; AVX_X86-NEXT: movl 8(%ebp), %eax +; AVX_X86-NEXT: vcvtusi2sdl (%eax), %xmm0, %xmm0 +; AVX_X86-NEXT: vmovsd %xmm0, (%esp) +; AVX_X86-NEXT: fldl (%esp) +; AVX_X86-NEXT: movl %ebp, %esp +; AVX_X86-NEXT: popl %ebp +; AVX_X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX_X86-NEXT: retl +entry: + %0 = load i32, i32* %a + %1 = uitofp i32 %0 to double + ret double %1 +} + +define double @int_to_double_rm_optsize(i32* %a) optsize { +; AVX-LABEL: int_to_double_rm_optsize: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvtusi2sdl (%rdi), %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX_X86-LABEL: int_to_double_rm_optsize: +; AVX_X86: # %bb.0: # %entry +; AVX_X86-NEXT: pushl %ebp +; AVX_X86-NEXT: .cfi_def_cfa_offset 8 +; AVX_X86-NEXT: .cfi_offset %ebp, -8 +; AVX_X86-NEXT: movl %esp, %ebp +; AVX_X86-NEXT: .cfi_def_cfa_register %ebp +; AVX_X86-NEXT: andl $-8, %esp +; AVX_X86-NEXT: subl $8, %esp +; AVX_X86-NEXT: movl 8(%ebp), %eax +; AVX_X86-NEXT: vcvtusi2sdl (%eax), %xmm0, %xmm0 +; AVX_X86-NEXT: vmovsd %xmm0, (%esp) +; AVX_X86-NEXT: fldl (%esp) +; AVX_X86-NEXT: movl %ebp, %esp +; AVX_X86-NEXT: popl %ebp +; AVX_X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX_X86-NEXT: retl +entry: + %0 = load i32, i32* %a + %1 = uitofp i32 %0 to double + ret double %1 +} + +define float @int_to_float_rr(i32 %a) { +; AVX-LABEL: int_to_float_rr: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX_X86-LABEL: int_to_float_rr: +; AVX_X86: # %bb.0: # %entry +; AVX_X86-NEXT: pushl %eax +; AVX_X86-NEXT: .cfi_def_cfa_offset 8 +; AVX_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX_X86-NEXT: vcvtusi2ssl %eax, %xmm0, %xmm0 +; AVX_X86-NEXT: vmovss %xmm0, (%esp) +; AVX_X86-NEXT: flds (%esp) +; AVX_X86-NEXT: popl %eax +; AVX_X86-NEXT: .cfi_def_cfa_offset 4 +; AVX_X86-NEXT: retl +entry: + %0 = uitofp i32 %a to float + ret float %0 +} + +define float @int_to_float_rm(i32* %a) { +; AVX-LABEL: int_to_float_rm: +; AVX: # %bb.0: # %entry +; AVX-NEXT: movl (%rdi), %eax +; AVX-NEXT: vcvtusi2ssl %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX_X86-LABEL: int_to_float_rm: +; AVX_X86: # %bb.0: # %entry +; AVX_X86-NEXT: pushl %eax +; AVX_X86-NEXT: .cfi_def_cfa_offset 8 +; AVX_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX_X86-NEXT: vcvtusi2ssl (%eax), %xmm0, %xmm0 +; AVX_X86-NEXT: vmovss %xmm0, (%esp) +; AVX_X86-NEXT: flds (%esp) +; AVX_X86-NEXT: popl %eax +; AVX_X86-NEXT: .cfi_def_cfa_offset 4 +; AVX_X86-NEXT: retl +entry: + %0 = load i32, i32* %a + %1 = uitofp i32 %0 to float + ret float %1 +} + +define float @int_to_float_rm_optsize(i32* %a) optsize { +; AVX-LABEL: int_to_float_rm_optsize: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvtusi2ssl (%rdi), %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX_X86-LABEL: int_to_float_rm_optsize: +; AVX_X86: # %bb.0: # %entry +; AVX_X86-NEXT: pushl %eax +; AVX_X86-NEXT: .cfi_def_cfa_offset 8 +; AVX_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX_X86-NEXT: vcvtusi2ssl (%eax), %xmm0, %xmm0 +; AVX_X86-NEXT: vmovss %xmm0, (%esp) +; AVX_X86-NEXT: flds (%esp) +; AVX_X86-NEXT: popl %eax +; AVX_X86-NEXT: .cfi_def_cfa_offset 4 +; AVX_X86-NEXT: retl +entry: + %0 = load i32, i32* %a + %1 = uitofp i32 %0 to float + ret float %1 +} -- 2.50.1