From 9724d35716802bd6d533ff366cf23551a6fe76ce Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 7 Jan 2017 17:03:51 +0000 Subject: [PATCH] [CostModel][X86] Reordered AVX1 arithmetic cost LUT into descending target order. NFCI. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291352 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetTransformInfo.cpp | 54 +++++++++++------------ 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 9167c09b43f..7c3739719bc 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -440,6 +440,33 @@ int X86TTIImpl::getArithmeticInstrCost( ISD = ISD::MUL; } + static const CostTblEntry AVX1CostTable[] = { + // We don't have to scalarize unsupported ops. We can issue two half-sized + // operations and we only need to extract the upper YMM half. + // Two ops + 1 extract + 1 insert = 4. + { ISD::MUL, MVT::v16i16, 4 }, + { ISD::MUL, MVT::v8i32, 4 }, + { ISD::SUB, MVT::v32i8, 4 }, + { ISD::ADD, MVT::v32i8, 4 }, + { ISD::SUB, MVT::v16i16, 4 }, + { ISD::ADD, MVT::v16i16, 4 }, + { ISD::SUB, MVT::v8i32, 4 }, + { ISD::ADD, MVT::v8i32, 4 }, + { ISD::SUB, MVT::v4i64, 4 }, + { ISD::ADD, MVT::v4i64, 4 }, + + // A v4i64 multiply is custom lowered as two split v2i64 vectors that then + // are lowered as a series of long multiplies(3), shifts(3) and adds(2) + // Because we believe v4i64 to be a legal type, we must also include the + // extract+insert in the cost table. Therefore, the cost here is 18 + // instead of 8. + { ISD::MUL, MVT::v4i64, 18 }, + }; + + if (ST->hasAVX() && !ST->hasAVX2()) + if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, LT.second)) + return LT.first * Entry->Cost; + static const CostTblEntry SSE42CostTable[] = { { ISD::FDIV, MVT::f32, 14 }, // Nehalem from http://www.agner.org/ { ISD::FDIV, MVT::v4f32, 14 }, // Nehalem from http://www.agner.org/ @@ -529,33 +556,6 @@ int X86TTIImpl::getArithmeticInstrCost( if (const auto *Entry = CostTableLookup(SSE2CostTable, ISD, LT.second)) return LT.first * Entry->Cost; - static const CostTblEntry AVX1CostTable[] = { - // We don't have to scalarize unsupported ops. We can issue two half-sized - // operations and we only need to extract the upper YMM half. - // Two ops + 1 extract + 1 insert = 4. - { ISD::MUL, MVT::v16i16, 4 }, - { ISD::MUL, MVT::v8i32, 4 }, - { ISD::SUB, MVT::v32i8, 4 }, - { ISD::ADD, MVT::v32i8, 4 }, - { ISD::SUB, MVT::v16i16, 4 }, - { ISD::ADD, MVT::v16i16, 4 }, - { ISD::SUB, MVT::v8i32, 4 }, - { ISD::ADD, MVT::v8i32, 4 }, - { ISD::SUB, MVT::v4i64, 4 }, - { ISD::ADD, MVT::v4i64, 4 }, - // A v4i64 multiply is custom lowered as two split v2i64 vectors that then - // are lowered as a series of long multiplies(3), shifts(3) and adds(2) - // Because we believe v4i64 to be a legal type, we must also include the - // extract+insert in the cost table. Therefore, the cost here is 18 - // instead of 8. - { ISD::MUL, MVT::v4i64, 18 }, - }; - - // Look for AVX1 lowering tricks. - if (ST->hasAVX() && !ST->hasAVX2()) - if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, LT.second)) - return LT.first * Entry->Cost; - static const CostTblEntry SSE1CostTable[] = { { ISD::FDIV, MVT::f32, 17 }, // Pentium III from http://www.agner.org/ { ISD::FDIV, MVT::v4f32, 34 }, // Pentium III from http://www.agner.org/ -- 2.40.0