[CostModel][X86] Fixed AVX1/AVX512 sdiv/udiv general costs for 256/512 bit integer...

author Simon Pilgrim <llvm-dev@redking.me.uk>

Thu, 20 Oct 2016 16:39:11 +0000 (16:39 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Thu, 20 Oct 2016 16:39:11 +0000 (16:39 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Thu, 20 Oct 2016 16:39:11 +0000 (16:39 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Thu, 20 Oct 2016 16:39:11 +0000 (16:39 +0000)
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp

index a37ecd590239fbc7e7356970b1bd5ff8383693a1..11bce7c46f51030c99a21453cec31f9d617a4034 100644 (file)
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -156,6 +156,25 @@ int X86TTIImpl::getArithmeticInstrCost(
        return LT.first * Entry->Cost;
    }
  
+  static const CostTblEntry AVX512BWCostTable[] = {
+    // Vectorizing division is a bad idea. See the SSE2 table for more comments.
+    { ISD::SDIV,  MVT::v64i8,  64*20 },
+    { ISD::SDIV,  MVT::v32i16, 32*20 },
+    { ISD::SDIV,  MVT::v16i32, 16*20 },
+    { ISD::SDIV,  MVT::v8i64,  8*20 },
+    { ISD::UDIV,  MVT::v64i8,  64*20 },
+    { ISD::UDIV,  MVT::v32i16, 32*20 },
+    { ISD::UDIV,  MVT::v16i32, 16*20 },
+    { ISD::UDIV,  MVT::v8i64,  8*20 },
+  };
+
+  // Look for AVX512BW lowering tricks for custom cases.
+  if (ST->hasBWI()) {
+    if (const auto *Entry = CostTableLookup(AVX512BWCostTable, ISD,
+                                            LT.second))
+      return LT.first * Entry->Cost;
+  }
+
    static const CostTblEntry AVX512CostTable[] = {
      { ISD::SHL,     MVT::v16i32,    1 },
      { ISD::SRL,     MVT::v16i32,    1 },
@@ -244,7 +263,16 @@ int X86TTIImpl::getArithmeticInstrCost(
      { ISD::SRA,  MVT::v16i16,     10 }, // extend/vpsravd/pack sequence.
      { ISD::SRA,  MVT::v2i64,       4 }, // srl/xor/sub sequence.
      { ISD::SRA,  MVT::v4i64,       4 }, // srl/xor/sub sequence.
+  };
+
+  // Look for AVX2 lowering tricks for custom cases.
+  if (ST->hasAVX2()) {
+    if (const auto *Entry = CostTableLookup(AVX2CustomCostTable, ISD,
+                                            LT.second))
+      return LT.first * Entry->Cost;
+  }
  
+  static const CostTblEntry AVXCustomCostTable[] = {
      // Vectorizing division is a bad idea. See the SSE2 table for more comments.
      { ISD::SDIV,  MVT::v32i8,  32*20 },
      { ISD::SDIV,  MVT::v16i16, 16*20 },
@@ -257,8 +285,8 @@ int X86TTIImpl::getArithmeticInstrCost(
    };
  
    // Look for AVX2 lowering tricks for custom cases.
-  if (ST->hasAVX2()) {
-    if (const auto *Entry = CostTableLookup(AVX2CustomCostTable, ISD,
+  if (ST->hasAVX()) {
+    if (const auto *Entry = CostTableLookup(AVXCustomCostTable, ISD,
                                              LT.second))
        return LT.first * Entry->Cost;
    }
diff --git a/test/Analysis/CostModel/X86/div.ll b/test/Analysis/CostModel/X86/div.ll

index 21a008198dbe893908da9eca2f5654ae64751bbf..2fe58e19cf2b60733ed41b803249bcd3a0c5be5c 100644 (file)
--- a/test/Analysis/CostModel/X86/div.ll
+++ b/test/Analysis/CostModel/X86/div.ll
@@ -14,75 +14,49 @@ define i32 @sdiv() {
    ; CHECK: cost of 1 {{.*}} %I64 = sdiv
    %I64 = sdiv i64 undef, undef
    ; SSE: cost of 40 {{.*}} %V2i64 = sdiv
-  ; AVX1: cost of 40 {{.*}} %V2i64 = sdiv
-  ; AVX2: cost of 40 {{.*}} %V2i64 = sdiv
-  ; AVX512: cost of 40 {{.*}} %V2i64 = sdiv
+  ; AVX: cost of 40 {{.*}} %V2i64 = sdiv
    %V2i64 = sdiv <2 x i64> undef, undef
    ; SSE: cost of 80 {{.*}} %V4i64 = sdiv
-  ; AVX1: cost of 12 {{.*}} %V4i64 = sdiv
-  ; AVX2: cost of 80 {{.*}} %V4i64 = sdiv
-  ; AVX512: cost of 80 {{.*}} %V4i64 = sdiv
+  ; AVX: cost of 80 {{.*}} %V4i64 = sdiv
    %V4i64 = sdiv <4 x i64> undef, undef
    ; SSE: cost of 160 {{.*}} %V8i64 = sdiv
-  ; AVX1: cost of 24 {{.*}} %V8i64 = sdiv
-  ; AVX2: cost of 160 {{.*}} %V8i64 = sdiv
-  ; AVX512: cost of 24 {{.*}} %V8i64 = sdiv
+  ; AVX: cost of 160 {{.*}} %V8i64 = sdiv
    %V8i64 = sdiv <8 x i64> undef, undef
  
    ; CHECK: cost of 1 {{.*}} %I32 = sdiv
    %I32 = sdiv i32 undef, undef
    ; SSE: cost of 80 {{.*}} %V4i32 = sdiv
-  ; AVX1: cost of 80 {{.*}} %V4i32 = sdiv
-  ; AVX2: cost of 80 {{.*}} %V4i32 = sdiv
-  ; AVX512: cost of 80 {{.*}} %V4i32 = sdiv
+  ; AVX: cost of 80 {{.*}} %V4i32 = sdiv
    %V4i32 = sdiv <4 x i32> undef, undef
    ; SSE: cost of 160 {{.*}} %V8i32 = sdiv
-  ; AVX1: cost of 24 {{.*}} %V8i32 = sdiv
-  ; AVX2: cost of 160 {{.*}} %V8i32 = sdiv
-  ; AVX512: cost of 160 {{.*}} %V8i32 = sdiv
+  ; AVX: cost of 160 {{.*}} %V8i32 = sdiv
    %V8i32 = sdiv <8 x i32> undef, undef
    ; SSE: cost of 320 {{.*}} %V16i32 = sdiv
-  ; AVX1: cost of 48 {{.*}} %V16i32 = sdiv
-  ; AVX2: cost of 320 {{.*}} %V16i32 = sdiv
-  ; AVX512: cost of 48 {{.*}} %V16i32 = sdiv
+  ; AVX: cost of 320 {{.*}} %V16i32 = sdiv
    %V16i32 = sdiv <16 x i32> undef, undef
  
    ; CHECK: cost of 1 {{.*}} %I16 = sdiv
    %I16 = sdiv i16 undef, undef
    ; SSE: cost of 160 {{.*}} %V8i16 = sdiv
-  ; AVX1: cost of 160 {{.*}} %V8i16 = sdiv
-  ; AVX2: cost of 160 {{.*}} %V8i16 = sdiv
-  ; AVX512: cost of 160 {{.*}} %V8i16 = sdiv
+  ; AVX: cost of 160 {{.*}} %V8i16 = sdiv
    %V8i16 = sdiv <8 x i16> undef, undef
    ; SSE: cost of 320 {{.*}} %V16i16 = sdiv
-  ; AVX1: cost of 48 {{.*}} %V16i16 = sdiv
-  ; AVX2: cost of 320 {{.*}} %V16i16 = sdiv
-  ; AVX512: cost of 320 {{.*}} %V16i16 = sdiv
+  ; AVX: cost of 320 {{.*}} %V16i16 = sdiv
    %V16i16 = sdiv <16 x i16> undef, undef
    ; SSE: cost of 640 {{.*}} %V32i16 = sdiv
-  ; AVX1: cost of 96 {{.*}} %V32i16 = sdiv
-  ; AVX2: cost of 640 {{.*}} %V32i16 = sdiv
-  ; AVX512F: cost of 640 {{.*}} %V32i16 = sdiv
-  ; AVX512BW: cost of 96 {{.*}} %V32i16 = sdiv
+  ; AVX: cost of 640 {{.*}} %V32i16 = sdiv
    %V32i16 = sdiv <32 x i16> undef, undef
  
    ; CHECK: cost of 1 {{.*}} %I8 = sdiv
    %I8 = sdiv i8 undef, undef
    ; SSE: cost of 320 {{.*}} %V16i8 = sdiv
-  ; AVX1: cost of 320 {{.*}} %V16i8 = sdiv
-  ; AVX2: cost of 320 {{.*}} %V16i8 = sdiv
-  ; AVX512: cost of 320 {{.*}} %V16i8 = sdiv
+  ; AVX: cost of 320 {{.*}} %V16i8 = sdiv
    %V16i8 = sdiv <16 x i8> undef, undef
    ; SSE: cost of 640 {{.*}} %V32i8 = sdiv
-  ; AVX1: cost of 96 {{.*}} %V32i8 = sdiv
-  ; AVX2: cost of 640 {{.*}} %V32i8 = sdiv
-  ; AVX512: cost of 640 {{.*}} %V32i8 = sdiv
+  ; AVX: cost of 640 {{.*}} %V32i8 = sdiv
    %V32i8 = sdiv <32 x i8> undef, undef
    ; SSE: cost of 1280 {{.*}} %V64i8 = sdiv
-  ; AVX1: cost of 192 {{.*}} %V64i8 = sdiv
-  ; AVX2: cost of 1280 {{.*}} %V64i8 = sdiv
-  ; AVX512F: cost of 1280 {{.*}} %V64i8 = sdiv
-  ; AVX512BW: cost of 192 {{.*}} %V64i8 = sdiv
+  ; AVX: cost of 1280 {{.*}} %V64i8 = sdiv
    %V64i8 = sdiv <64 x i8> undef, undef
  
    ret i32 undef
@@ -93,75 +67,49 @@ define i32 @udiv() {
    ; CHECK: cost of 1 {{.*}} %I64 = udiv
    %I64 = udiv i64 undef, undef
    ; SSE: cost of 40 {{.*}} %V2i64 = udiv
-  ; AVX1: cost of 40 {{.*}} %V2i64 = udiv
-  ; AVX2: cost of 40 {{.*}} %V2i64 = udiv
-  ; AVX512: cost of 40 {{.*}} %V2i64 = udiv
+  ; AVX: cost of 40 {{.*}} %V2i64 = udiv
    %V2i64 = udiv <2 x i64> undef, undef
    ; SSE: cost of 80 {{.*}} %V4i64 = udiv
-  ; AVX1: cost of 12 {{.*}} %V4i64 = udiv
-  ; AVX2: cost of 80 {{.*}} %V4i64 = udiv
-  ; AVX512: cost of 80 {{.*}} %V4i64 = udiv
+  ; AVX: cost of 80 {{.*}} %V4i64 = udiv
    %V4i64 = udiv <4 x i64> undef, undef
    ; SSE: cost of 160 {{.*}} %V8i64 = udiv
-  ; AVX1: cost of 24 {{.*}} %V8i64 = udiv
-  ; AVX2: cost of 160 {{.*}} %V8i64 = udiv
-  ; AVX512: cost of 24 {{.*}} %V8i64 = udiv
+  ; AVX: cost of 160 {{.*}} %V8i64 = udiv
    %V8i64 = udiv <8 x i64> undef, undef
  
    ; CHECK: cost of 1 {{.*}} %I32 = udiv
    %I32 = udiv i32 undef, undef
    ; SSE: cost of 80 {{.*}} %V4i32 = udiv
-  ; AVX1: cost of 80 {{.*}} %V4i32 = udiv
-  ; AVX2: cost of 80 {{.*}} %V4i32 = udiv
-  ; AVX512: cost of 80 {{.*}} %V4i32 = udiv
+  ; AVX: cost of 80 {{.*}} %V4i32 = udiv
    %V4i32 = udiv <4 x i32> undef, undef
    ; SSE: cost of 160 {{.*}} %V8i32 = udiv
-  ; AVX1: cost of 24 {{.*}} %V8i32 = udiv
-  ; AVX2: cost of 160 {{.*}} %V8i32 = udiv
-  ; AVX512: cost of 160 {{.*}} %V8i32 = udiv
+  ; AVX: cost of 160 {{.*}} %V8i32 = udiv
    %V8i32 = udiv <8 x i32> undef, undef
    ; SSE: cost of 320 {{.*}} %V16i32 = udiv
-  ; AVX1: cost of 48 {{.*}} %V16i32 = udiv
-  ; AVX2: cost of 320 {{.*}} %V16i32 = udiv
-  ; AVX512: cost of 48 {{.*}} %V16i32 = udiv
+  ; AVX: cost of 320 {{.*}} %V16i32 = udiv
    %V16i32 = udiv <16 x i32> undef, undef
  
    ; CHECK: cost of 1 {{.*}} %I16 = udiv
    %I16 = udiv i16 undef, undef
    ; SSE: cost of 160 {{.*}} %V8i16 = udiv
-  ; AVX1: cost of 160 {{.*}} %V8i16 = udiv
-  ; AVX2: cost of 160 {{.*}} %V8i16 = udiv
-  ; AVX512: cost of 160 {{.*}} %V8i16 = udiv
+  ; AVX: cost of 160 {{.*}} %V8i16 = udiv
    %V8i16 = udiv <8 x i16> undef, undef
    ; SSE: cost of 320 {{.*}} %V16i16 = udiv
-  ; AVX1: cost of 48 {{.*}} %V16i16 = udiv
-  ; AVX2: cost of 320 {{.*}} %V16i16 = udiv
-  ; AVX512: cost of 320 {{.*}} %V16i16 = udiv
+  ; AVX: cost of 320 {{.*}} %V16i16 = udiv
    %V16i16 = udiv <16 x i16> undef, undef
    ; SSE: cost of 640 {{.*}} %V32i16 = udiv
-  ; AVX1: cost of 96 {{.*}} %V32i16 = udiv
-  ; AVX2: cost of 640 {{.*}} %V32i16 = udiv
-  ; AVX512F: cost of 640 {{.*}} %V32i16 = udiv
-  ; AVX512BW: cost of 96 {{.*}} %V32i16 = udiv
+  ; AVX: cost of 640 {{.*}} %V32i16 = udiv
    %V32i16 = udiv <32 x i16> undef, undef
  
    ; CHECK: cost of 1 {{.*}} %I8 = udiv
    %I8 = udiv i8 undef, undef
    ; SSE: cost of 320 {{.*}} %V16i8 = udiv
-  ; AVX1: cost of 320 {{.*}} %V16i8 = udiv
-  ; AVX2: cost of 320 {{.*}} %V16i8 = udiv
-  ; AVX512: cost of 320 {{.*}} %V16i8 = udiv
+  ; AVX: cost of 320 {{.*}} %V16i8 = udiv
    %V16i8 = udiv <16 x i8> undef, undef
    ; SSE: cost of 640 {{.*}} %V32i8 = udiv
-  ; AVX1: cost of 96 {{.*}} %V32i8 = udiv
-  ; AVX2: cost of 640 {{.*}} %V32i8 = udiv
-  ; AVX512: cost of 640 {{.*}} %V32i8 = udiv
+  ; AVX: cost of 640 {{.*}} %V32i8 = udiv
    %V32i8 = udiv <32 x i8> undef, undef
    ; SSE: cost of 1280 {{.*}} %V64i8 = udiv
-  ; AVX1: cost of 192 {{.*}} %V64i8 = udiv
-  ; AVX2: cost of 1280 {{.*}} %V64i8 = udiv
-  ; AVX512F: cost of 1280 {{.*}} %V64i8 = udiv
-  ; AVX512BW: cost of 192 {{.*}} %V64i8 = udiv
+  ; AVX: cost of 1280 {{.*}} %V64i8 = udiv
    %V64i8 = udiv <64 x i8> undef, undef
  
    ret i32 undef
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Thu, 20 Oct 2016 16:39:11 +0000 (16:39 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Thu, 20 Oct 2016 16:39:11 +0000 (16:39 +0000)
lib/Target/X86/X86TargetTransformInfo.cpp		patch \| blob \| history
test/Analysis/CostModel/X86/div.ll		patch \| blob \| history