From: Craig Topper Date: Thu, 22 Aug 2019 08:18:45 +0000 (+0000) Subject: [X86] Lower the cost of v2i32->v2f64 sint_to_fp under vector widening legalization. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3d187fdcb77ee4b345871a01fdb59cfb7419d76c;p=llvm [X86] Lower the cost of v2i32->v2f64 sint_to_fp under vector widening legalization. I don't really understand the costs we're using for fp_to_sint, but prior to widening legalization we used 20 as the cost for this via the v2i64->v2f64 entry. That number seems better than the 40 we got with widening legalization. So now we need either a v2i32->v2f64 entry or a v4i32->v2f64 entry depending on whether AVX is enabled or not since we skip the first SSE2 table look up under AVX. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@369628 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 1181181a3c5..1e8f1b31214 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1568,6 +1568,11 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 4 }, }; + static const TypeConversionCostTblEntry SSE2ConversionTblWide[] = { + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 2*10 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2*10 }, + }; + static const TypeConversionCostTblEntry SSE2ConversionTbl[] = { // These are somewhat magic numbers justified by looking at the output of // Intel's IACA, running some kernels and making sure when we take @@ -1633,6 +1638,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, std::pair LTSrc = TLI->getTypeLegalizationCost(DL, Src); std::pair LTDest = TLI->getTypeLegalizationCost(DL, Dst); + if (ST->hasSSE2() && !ST->hasAVX() && + ExperimentalVectorWideningLegalization) { + if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTblWide, ISD, + LTDest.second, LTSrc.second)) + return LTSrc.first * Entry->Cost; + } + if (ST->hasSSE2() && !ST->hasAVX()) { if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD, LTDest.second, LTSrc.second)) @@ -1705,6 +1717,12 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, return Entry->Cost; } + if (ST->hasSSE2() && ExperimentalVectorWideningLegalization) { + if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTblWide, ISD, + SimpleDstTy, SimpleSrcTy)) + return Entry->Cost; + } + if (ST->hasSSE2()) { if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) diff --git a/test/Analysis/CostModel/X86/cast.ll b/test/Analysis/CostModel/X86/cast.ll index 4a52a01b652..c3e1f624cdc 100644 --- a/test/Analysis/CostModel/X86/cast.ll +++ b/test/Analysis/CostModel/X86/cast.ll @@ -314,13 +314,13 @@ define i32 @masks4(<4 x i1> %in) { define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { ; SSE-LABEL: 'sitofp4' ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'sitofp4' diff --git a/test/Analysis/CostModel/X86/sitofp.ll b/test/Analysis/CostModel/X86/sitofp.ll index 493b24e93b9..8ac8050da39 100644 --- a/test/Analysis/CostModel/X86/sitofp.ll +++ b/test/Analysis/CostModel/X86/sitofp.ll @@ -85,28 +85,28 @@ define i32 @sitofp_i16_double() { define i32 @sitofp_i32_double() { ; SSE-LABEL: 'sitofp_i32_double' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'sitofp_i32_double' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'sitofp_i32_double' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> +; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'sitofp_i32_double' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef