[X86] Add costs for SSE zext/sext to v4i64 to TTI

author Michael Kuperstein <mkuper@google.com>

Fri, 10 Jun 2016 17:01:05 +0000 (17:01 +0000)

committer Michael Kuperstein <mkuper@google.com>

Fri, 10 Jun 2016 17:01:05 +0000 (17:01 +0000)
author Michael Kuperstein <mkuper@google.com>
Fri, 10 Jun 2016 17:01:05 +0000 (17:01 +0000)
committer Michael Kuperstein <mkuper@google.com>
Fri, 10 Jun 2016 17:01:05 +0000 (17:01 +0000)
diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h

index 669bcbc3c1be1d32540c500ff45b4de841cd5d3e..78cb0af16996eaf91d26787517e4e593f0ebe897 100644 (file)
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@@ -412,6 +412,9 @@ public:
        // If we are converting vectors and the operation is illegal, or
        // if the vectors are legalized to different types, estimate the
        // scalarization costs.
+      // TODO: This is probably a big overestimate. For splits, we should have
+      // something like getTypeLegalizationCost() + 2 * getCastInstrCost().
+      // The same applies to getCmpSelInstrCost() and getArithmeticInstrCost()
        unsigned Num = Dst->getVectorNumElements();
        unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
            Opcode, Dst->getScalarType(), Src->getScalarType());
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp

index 1baa49c3c08dd95920e70aa51ece283d63d9bca5..c86790a9326f437ae78e0bdb91e54ec998d89f1a 100644 (file)
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -709,6 +709,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
    };
  
    static const TypeConversionCostTblEntry SSE41ConversionTbl[] = {
+    { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32,   2 },
+    { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32,   2 },
+    { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16,   2 },
+    { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16,   2 },
+    { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8,    2 },
+    { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8,    2 },
+
      { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4 },
      { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4 },
      { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i16,  2 },
@@ -759,6 +766,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
      { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
      { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
  
+    { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32,   3 },
+    { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32,   5 },
+    { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16,   3 },
+    { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16,  10 },
+    { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8,    4 },
+    { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8,    8 },
+
      { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 6 },
      { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 8 },
      { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i16,  3 },
diff --git a/test/Analysis/CostModel/X86/sse-itoi.ll b/test/Analysis/CostModel/X86/sse-itoi.ll

index 13a95a81d42dd7244986aeb1999f111d2e9c44a3..46d993564609817ee7d8d974ab2ff470ee120c83 100644 (file)
--- a/test/Analysis/CostModel/X86/sse-itoi.ll
+++ b/test/Analysis/CostModel/X86/sse-itoi.ll
@@ -1,6 +1,85 @@
  ; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
  ; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse4.1 -cost-model -analyze < %s | FileCheck --check-prefix=SSE41 %s
  
+define void @zext_v4i8_to_v4i64(<4 x i8>* %a) {
+; SSE2: zext_v4i8_to_v4i64
+; SSE2: cost of 4 {{.*}} zext
+;
+; SSE41: zext_v4i8_to_v4i64
+; SSE41: cost of 2 {{.*}} zext
+;
+  %1 = load <4 x i8>, <4 x i8>* %a
+  %2 = zext <4 x i8> %1 to <4 x i64>
+  store <4 x i64> %2, <4 x i64>* undef, align 4
+  ret void
+}
+
+define void @sext_v4i8_to_v4i64(<4 x i8>* %a) {
+; SSE2: sext_v4i8_to_v4i64
+; SSE2: cost of 8 {{.*}} sext
+;
+; SSE41: sext_v4i8_to_v4i64
+; SSE41: cost of 2 {{.*}} sext
+;
+  %1 = load <4 x i8>, <4 x i8>* %a
+  %2 = sext <4 x i8> %1 to <4 x i64>
+  store <4 x i64> %2, <4 x i64>* undef, align 4
+  ret void
+}
+
+define void @zext_v4i16_to_v4i64(<4 x i16>* %a) {
+; SSE2: zext_v4i16_to_v4i64
+; SSE2: cost of 3 {{.*}} zext
+;
+; SSE41: zext_v4i16_to_v4i64
+; SSE41: cost of 2 {{.*}} zext
+;
+  %1 = load <4 x i16>, <4 x i16>* %a
+  %2 = zext <4 x i16> %1 to <4 x i64>
+  store <4 x i64> %2, <4 x i64>* undef, align 4
+  ret void
+}
+
+define void @sext_v4i16_to_v4i64(<4 x i16>* %a) {
+; SSE2: sext_v4i16_to_v4i64
+; SSE2: cost of 10 {{.*}} sext
+;
+; SSE41: sext_v4i16_to_v4i64
+; SSE41: cost of 2 {{.*}} sext
+;
+  %1 = load <4 x i16>, <4 x i16>* %a
+  %2 = sext <4 x i16> %1 to <4 x i64>
+  store <4 x i64> %2, <4 x i64>* undef, align 4
+  ret void
+}
+
+
+define void @zext_v4i32_to_v4i64(<4 x i32>* %a) {
+; SSE2: zext_v4i32_to_v4i64
+; SSE2: cost of 3 {{.*}} zext
+;
+; SSE41: zext_v4i32_to_v4i64
+; SSE41: cost of 2 {{.*}} zext
+;
+  %1 = load <4 x i32>, <4 x i32>* %a
+  %2 = zext <4 x i32> %1 to <4 x i64>
+  store <4 x i64> %2, <4 x i64>* undef, align 4
+  ret void
+}
+
+define void @sext_v4i32_to_v4i64(<4 x i32>* %a) {
+; SSE2: sext_v4i32_to_v4i64
+; SSE2: cost of 5 {{.*}} sext
+;
+; SSE41: sext_v4i32_to_v4i64
+; SSE41: cost of 2 {{.*}} sext
+;
+  %1 = load <4 x i32>, <4 x i32>* %a
+  %2 = sext <4 x i32> %1 to <4 x i64>
+  store <4 x i64> %2, <4 x i64>* undef, align 4
+  ret void
+}
+
  define void @zext_v16i16_to_v16i32(<16 x i16>* %a) {
  ; SSE2: zext_v16i16_to_v16i32
  ; SSE2: cost of 6 {{.*}} zext
author	Michael Kuperstein <mkuper@google.com>
	Fri, 10 Jun 2016 17:01:05 +0000 (17:01 +0000)
committer	Michael Kuperstein <mkuper@google.com>
	Fri, 10 Jun 2016 17:01:05 +0000 (17:01 +0000)
include/llvm/CodeGen/BasicTTIImpl.h		patch \| blob \| history
lib/Target/X86/X86TargetTransformInfo.cpp		patch \| blob \| history
test/Analysis/CostModel/X86/sse-itoi.ll		patch \| blob \| history