From: Craig Topper Date: Thu, 14 Feb 2019 01:41:43 +0000 (+0000) Subject: [X86] Make (f80 (sint_to_fp (i16))) use fistps/fisttps instead of fistpl/fisttpl... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=afc36095627b820e5b6804c6c98aa459bd7837f2;p=llvm [X86] Make (f80 (sint_to_fp (i16))) use fistps/fisttps instead of fistpl/fisttpl when SSE is enabled. When SSE is enabled sint_to_fp with i16 is blindly promoted to i32, but that changes the behavior of f80 conversion. Move the promotion to i16 to LowerFP_TO_INT so we can limit it based on the floating point type. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354003 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ac4bce0e454..680aa438a84 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -257,14 +257,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); - if (X86ScalarSSEf32) { - setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); - // f32 and f64 cases are Legal, f80 case is not - setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); - } else { - setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); - setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); - } + setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); + setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); } else { setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand); @@ -18698,12 +18692,12 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT; MVT VT = Op.getSimpleValueType(); + SDValue Src = Op.getOperand(0); + MVT SrcVT = Src.getSimpleValueType(); + SDLoc dl(Op); if (VT.isVector()) { - SDValue Src = Op.getOperand(0); - SDLoc dl(Op); - - if (VT == MVT::v2i1 && Src.getSimpleValueType() == MVT::v2f64) { + if (VT == MVT::v2i1 && SrcVT == MVT::v2f64) { MVT ResVT = MVT::v4i32; MVT TruncVT = MVT::v4i1; unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; @@ -18723,7 +18717,7 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { } assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!"); - if (VT == MVT::v2i64 && Src.getSimpleValueType() == MVT::v2f32) { + if (VT == MVT::v2i64 && SrcVT == MVT::v2f32) { return DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI, dl, VT, DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, DAG.getUNDEF(MVT::v2f32))); @@ -18735,9 +18729,8 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { assert(!VT.isVector()); if (!IsSigned && Subtarget.hasAVX512()) { - SDValue Src = Op.getOperand(0); // Conversions from f32/f64 should be legal. - if (Src.getValueType() != MVT::f80) + if (SrcVT != MVT::f80) return Op; // Use default expansion. @@ -18745,6 +18738,13 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } + // Promote i16 to i32 if we can use a SSE operation. + if (VT == MVT::i16 && isScalarFPTypeInSSEReg(SrcVT)) { + assert(IsSigned && "Expected i16 FP_TO_UINT to have been promoted!"); + SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Res); + } + if (SDValue V = FP_TO_INTHelper(Op, DAG, IsSigned)) return V; diff --git a/test/CodeGen/X86/fp-cvt.ll b/test/CodeGen/X86/fp-cvt.ll index 92bff0333be..b087e759c70 100644 --- a/test/CodeGen/X86/fp-cvt.ll +++ b/test/CodeGen/X86/fp-cvt.ll @@ -31,18 +31,16 @@ define i16 @fptosi_i16_fp80(x86_fp80 %a0) nounwind { ; X64-X87-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F ; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp) ; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: fistpl -{{[0-9]+}}(%rsp) +; X64-X87-NEXT: fistps -{{[0-9]+}}(%rsp) ; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: movl -{{[0-9]+}}(%rsp), %eax -; X64-X87-NEXT: # kill: def $ax killed $ax killed $eax +; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax ; X64-X87-NEXT: retq ; ; X64-SSSE3-LABEL: fptosi_i16_fp80: ; X64-SSSE3: # %bb.0: ; X64-SSSE3-NEXT: fldt {{[0-9]+}}(%rsp) -; X64-SSSE3-NEXT: fisttpl -{{[0-9]+}}(%rsp) -; X64-SSSE3-NEXT: movl -{{[0-9]+}}(%rsp), %eax -; X64-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax +; X64-SSSE3-NEXT: fisttps -{{[0-9]+}}(%rsp) +; X64-SSSE3-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax ; X64-SSSE3-NEXT: retq %1 = fptosi x86_fp80 %a0 to i16 ret i16 %1 @@ -73,18 +71,16 @@ define i16 @fptosi_i16_fp80_ld(x86_fp80 *%a0) nounwind { ; X64-X87-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F ; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp) ; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: fistpl -{{[0-9]+}}(%rsp) +; X64-X87-NEXT: fistps -{{[0-9]+}}(%rsp) ; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: movl -{{[0-9]+}}(%rsp), %eax -; X64-X87-NEXT: # kill: def $ax killed $ax killed $eax +; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax ; X64-X87-NEXT: retq ; ; X64-SSSE3-LABEL: fptosi_i16_fp80_ld: ; X64-SSSE3: # %bb.0: ; X64-SSSE3-NEXT: fldt (%rdi) -; X64-SSSE3-NEXT: fisttpl -{{[0-9]+}}(%rsp) -; X64-SSSE3-NEXT: movl -{{[0-9]+}}(%rsp), %eax -; X64-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax +; X64-SSSE3-NEXT: fisttps -{{[0-9]+}}(%rsp) +; X64-SSSE3-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax ; X64-SSSE3-NEXT: retq %1 = load x86_fp80, x86_fp80 *%a0 %2 = fptosi x86_fp80 %1 to i16