From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sat, 7 Jan 2017 21:33:00 +0000 (+0000)
Subject: [CostModel][X86] Generalized cost calculation of SHL by constant -> MUL conversion.
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e1ddc8e7d2668302343946a6bf70f011a446942e;p=llvm

[CostModel][X86] Generalized cost calculation of SHL by constant -> MUL conversion.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291364 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index e2401d4fe86..29cd8edc46f 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -409,21 +409,9 @@ int X86TTIImpl::getArithmeticInstrCost(
       Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) {
     MVT VT = LT.second;
     // Vector shift left by non uniform constant can be lowered
-    // into vector multiply (pmullw/pmulld).
-    if ((VT == MVT::v8i16 && ST->hasSSE2()) ||
-        (VT == MVT::v4i32 && ST->hasSSE41()))
-      return LT.first;
-
-    // v16i16 and v8i32 shifts by non-uniform constants are lowered into a
-    // sequence of extract + two vector multiply + insert.
-    if ((VT == MVT::v8i32 || VT == MVT::v16i16) &&
-       (ST->hasAVX() && !ST->hasAVX2()))
-      ISD = ISD::MUL;
-
-    // A vector shift left by non uniform constant is converted
-    // into a vector multiply; the new multiply is eventually
-    // lowered into a sequence of shuffles and 2 x pmuludq.
-    if (VT == MVT::v4i32 && ST->hasSSE2())
+    // into vector multiply.
+    if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->hasSSE2()) ||
+        ((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->hasAVX()))
       ISD = ISD::MUL;
   }
 
@@ -534,6 +522,7 @@ int X86TTIImpl::getArithmeticInstrCost(
     { ISD::SRA,  MVT::v4i64,  2*12 }, // srl/xor/sub sequence.
 
     { ISD::MUL,  MVT::v16i8,    12 }, // extend/pmullw/trunc sequence.
+    { ISD::MUL,  MVT::v8i16,     1 }, // pmullw
     { ISD::MUL,  MVT::v4i32,     6 }, // 3*pmuludq/4*shuffle
     { ISD::MUL,  MVT::v2i64,     8 }, // 3*pmuludq/3*shift/2*add
 
@@ -549,13 +538,13 @@ int X86TTIImpl::getArithmeticInstrCost(
     // generally a bad idea. Assume somewhat arbitrarily that we have to be able
     // to hide "20 cycles" for each lane.
     { ISD::SDIV,  MVT::v16i8,  16*20 },
-    { ISD::SDIV,  MVT::v8i16,  8*20 },
-    { ISD::SDIV,  MVT::v4i32,  4*20 },
-    { ISD::SDIV,  MVT::v2i64,  2*20 },
+    { ISD::SDIV,  MVT::v8i16,   8*20 },
+    { ISD::SDIV,  MVT::v4i32,   4*20 },
+    { ISD::SDIV,  MVT::v2i64,   2*20 },
     { ISD::UDIV,  MVT::v16i8,  16*20 },
-    { ISD::UDIV,  MVT::v8i16,  8*20 },
-    { ISD::UDIV,  MVT::v4i32,  4*20 },
-    { ISD::UDIV,  MVT::v2i64,  2*20 },
+    { ISD::UDIV,  MVT::v8i16,   8*20 },
+    { ISD::UDIV,  MVT::v4i32,   4*20 },
+    { ISD::UDIV,  MVT::v2i64,   2*20 },
   };
 
   if (ST->hasSSE2())