From: Simon Pilgrim Date: Sat, 7 Jan 2017 21:33:00 +0000 (+0000) Subject: [CostModel][X86] Generalized cost calculation of SHL by constant -> MUL conversion. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e1ddc8e7d2668302343946a6bf70f011a446942e;p=llvm [CostModel][X86] Generalized cost calculation of SHL by constant -> MUL conversion. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291364 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index e2401d4fe86..29cd8edc46f 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -409,21 +409,9 @@ int X86TTIImpl::getArithmeticInstrCost( Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) { MVT VT = LT.second; // Vector shift left by non uniform constant can be lowered - // into vector multiply (pmullw/pmulld). - if ((VT == MVT::v8i16 && ST->hasSSE2()) || - (VT == MVT::v4i32 && ST->hasSSE41())) - return LT.first; - - // v16i16 and v8i32 shifts by non-uniform constants are lowered into a - // sequence of extract + two vector multiply + insert. - if ((VT == MVT::v8i32 || VT == MVT::v16i16) && - (ST->hasAVX() && !ST->hasAVX2())) - ISD = ISD::MUL; - - // A vector shift left by non uniform constant is converted - // into a vector multiply; the new multiply is eventually - // lowered into a sequence of shuffles and 2 x pmuludq. - if (VT == MVT::v4i32 && ST->hasSSE2()) + // into vector multiply. + if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->hasSSE2()) || + ((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->hasAVX())) ISD = ISD::MUL; } @@ -534,6 +522,7 @@ int X86TTIImpl::getArithmeticInstrCost( { ISD::SRA, MVT::v4i64, 2*12 }, // srl/xor/sub sequence. { ISD::MUL, MVT::v16i8, 12 }, // extend/pmullw/trunc sequence. + { ISD::MUL, MVT::v8i16, 1 }, // pmullw { ISD::MUL, MVT::v4i32, 6 }, // 3*pmuludq/4*shuffle { ISD::MUL, MVT::v2i64, 8 }, // 3*pmuludq/3*shift/2*add @@ -549,13 +538,13 @@ int X86TTIImpl::getArithmeticInstrCost( // generally a bad idea. Assume somewhat arbitrarily that we have to be able // to hide "20 cycles" for each lane. { ISD::SDIV, MVT::v16i8, 16*20 }, - { ISD::SDIV, MVT::v8i16, 8*20 }, - { ISD::SDIV, MVT::v4i32, 4*20 }, - { ISD::SDIV, MVT::v2i64, 2*20 }, + { ISD::SDIV, MVT::v8i16, 8*20 }, + { ISD::SDIV, MVT::v4i32, 4*20 }, + { ISD::SDIV, MVT::v2i64, 2*20 }, { ISD::UDIV, MVT::v16i8, 16*20 }, - { ISD::UDIV, MVT::v8i16, 8*20 }, - { ISD::UDIV, MVT::v4i32, 4*20 }, - { ISD::UDIV, MVT::v2i64, 2*20 }, + { ISD::UDIV, MVT::v8i16, 8*20 }, + { ISD::UDIV, MVT::v4i32, 4*20 }, + { ISD::UDIV, MVT::v2i64, 2*20 }, }; if (ST->hasSSE2())