From 7cb6f240317bf85f62b8351d7e7ddbd1852e9113 Mon Sep 17 00:00:00 2001 From: Kang Zhang Date: Mon, 25 Feb 2019 02:46:16 +0000 Subject: [PATCH] [PowerPC] [PowerPC] Enhance the fast selection of fptoi & fptrunc instruction and clean up related asserts Summary: Fast selection of llvm fptoi & fptrunc instructions is not handled well about VSX instruction support. We'd use VSX float convert integer instruction instead of non-vsx float convert integer instruction if the operand register class is VSSRC or VSFRC because i32 and i64 are mapped to VSSRC and VSFRC correspondingly if VSX feature is openeded. For float trunc instruction, we do this silimar work like float convert integer instruction to try to use VSX instruction. Reviewed By: jsji Differential Revision: https://reviews.llvm.org/D58430 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354762 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCFastISel.cpp | 22 ++++++++++++++++++---- test/CodeGen/PowerPC/fast-isel-rsp.ll | 13 +++++++++++++ test/CodeGen/PowerPC/pr26180.ll | 25 +++++++++++++++++++------ 3 files changed, 50 insertions(+), 10 deletions(-) create mode 100644 test/CodeGen/PowerPC/fast-isel-rsp.ll diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 14a72bd9980..e22e682bc57 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -987,12 +987,17 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) { // Round the result to single precision. unsigned DestReg; - + auto RC = MRI.getRegClass(SrcReg); if (PPCSubTarget->hasSPE()) { DestReg = createResultReg(&PPC::SPE4RCRegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::EFSCFD), DestReg) .addReg(SrcReg); + } else if (isVSFRCRegClass(RC)) { + DestReg = createResultReg(&PPC::VSSRCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(PPC::XSRSP), DestReg) + .addReg(SrcReg); } else { DestReg = createResultReg(&PPC::F4RCRegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -1207,16 +1212,19 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { if (SrcReg == 0) return false; - // Convert f32 to f64 if necessary. This is just a meaningless copy - // to get the register class right. + // Convert f32 to f64 or convert VSSRC to VSFRC if necessary. This is just a + // meaningless copy to get the register class right. const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg); if (InRC == &PPC::F4RCRegClass) SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg); + else if (InRC == &PPC::VSSRCRegClass) + SrcReg = copyRegToRegClass(&PPC::VSFRCRegClass, SrcReg); // Determine the opcode for the conversion, which takes place - // entirely within FPRs. + // entirely within FPRs or VSRs. unsigned DestReg; unsigned Opc; + auto RC = MRI.getRegClass(SrcReg); if (PPCSubTarget->hasSPE()) { DestReg = createResultReg(&PPC::GPRCRegClass); @@ -1224,6 +1232,12 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ; else Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ; + } else if (isVSFRCRegClass(RC)) { + DestReg = createResultReg(&PPC::VSFRCRegClass); + if (DstVT == MVT::i32) + Opc = IsSigned ? PPC::XSCVDPSXWS : PPC::XSCVDPUXWS; + else + Opc = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS; } else { DestReg = createResultReg(&PPC::F8RCRegClass); if (DstVT == MVT::i32) diff --git a/test/CodeGen/PowerPC/fast-isel-rsp.ll b/test/CodeGen/PowerPC/fast-isel-rsp.ll new file mode 100644 index 00000000000..700e159ac83 --- /dev/null +++ b/test/CodeGen/PowerPC/fast-isel-rsp.ll @@ -0,0 +1,13 @@ +; RUN: llc -mcpu=generic -mtriple=powerpc64le-unknown-unknown -O0 < %s \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=GENERIC +; RUN: llc -mcpu=ppc -mtriple=powerpc64le-unknown-unknown -O0 < %s \ +; RUN: -verify-machineinstrs | FileCheck %s + +define float @testRSP(double %x) { +entry: + %0 = fptrunc double %x to float + ret float %0 +; CHECK: frsp 1, 1 +; GENERIC: xsrsp 1, 1 +} + diff --git a/test/CodeGen/PowerPC/pr26180.ll b/test/CodeGen/PowerPC/pr26180.ll index d4b05dfeed6..e8c6c60061c 100644 --- a/test/CodeGen/PowerPC/pr26180.ll +++ b/test/CodeGen/PowerPC/pr26180.ll @@ -1,14 +1,27 @@ -; RUN: llc -mcpu=generic -mtriple=powerpc64le-unknown-unknown -O0 < %s | FileCheck %s --check-prefix=GENERIC -; RUN: llc -mcpu=ppc -mtriple=powerpc64le-unknown-unknown -O0 < %s | FileCheck %s +; RUN: llc -mcpu=generic -mtriple=powerpc64le-unknown-unknown -O0 < %s \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=GENERIC +; RUN: llc -mcpu=ppc -mtriple=powerpc64le-unknown-unknown -O0 < %s \ +; RUN: -verify-machineinstrs | FileCheck %s define i32 @bad(double %x) { %1 = fptoui double %x to i32 ret i32 %1 -} ; CHECK: fctidz [[REG0:[0-9]+]], 1 ; CHECK: stfd [[REG0]], [[OFF:.*]](1) ; CHECK: lwz {{[0-9]*}}, [[OFF]](1) -; GENERIC: fctiwuz [[REG0:[0-9]+]], 1 -; GENERIC: stfd [[REG0]], [[OFF:.*]](1) -; GENERIC: lwz {{[0-9]*}}, [[OFF]](1) +; GENERIC: xscvdpuxws [[REG0:[0-9]+]], 1 +; GENERIC: mfvsrwz {{[0-9]*}}, [[REG0]] +} + +define i32 @bad1(float %x) { +entry: + %0 = fptosi float %x to i32 + ret i32 %0 + +; CHECK: fctiwz [[REG0:[0-9]+]], 1 +; CHECK: stfd [[REG0]], [[OFF:.*]](1) +; CHECK: lwa {{[0-9]*}}, [[OFF]](1) +; GENERIC: xscvdpsxws [[REG0:[0-9]+]], 1 +; GENERIC: mfvsrwz {{[0-9]*}}, [[REG0]] +} -- 2.40.0