return LT.first * AVX2UniformConstCostTable[Idx].Cost;
}
+ static const CostTblEntry<MVT::SimpleValueType> AVX512CostTable[] = {
+ { ISD::SHL, MVT::v16i32, 1 },
+ { ISD::SRL, MVT::v16i32, 1 },
+ { ISD::SRA, MVT::v16i32, 1 },
+ { ISD::SHL, MVT::v8i64, 1 },
+ { ISD::SRL, MVT::v8i64, 1 },
+ { ISD::SRA, MVT::v8i64, 1 },
+ };
+
static const CostTblEntry<MVT::SimpleValueType> AVX2CostTable[] = {
// Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
// customize them to detect the cases where shift amount is a scalar one.
{ ISD::UDIV, MVT::v4i64, 4*20 },
};
+ if (ST->hasAVX512()) {
+ int Idx = CostTableLookup(AVX512CostTable, ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * AVX512CostTable[Idx].Cost;
+ }
// Look for AVX2 lowering tricks.
if (ST->hasAVX2()) {
if (ISD == ISD::SHL && LT.second == MVT::v16i16 &&
return LTSrc.first * SSE2ConvTbl[Idx].Cost;
}
+ static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ AVX512ConversionTbl[] = {
+ { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 1 },
+ { ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 },
+ { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 1 },
+ { ISD::FP_ROUND, MVT::v16f32, MVT::v8f64, 3 },
+
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 1 },
+ { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 1 },
+ { ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 1 },
+ { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 1 },
+ { ISD::TRUNCATE, MVT::v16i32, MVT::v8i64, 4 },
+
+ // v16i1 -> v16i32 - load + broadcast
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
+
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v16i32, 3 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v16i32, 3 },
+
+ };
+
+ if (ST->hasAVX512()) {
+ int Idx = ConvertCostTableLookup(AVX512ConversionTbl, ISD, LTDest.second,
+ LTSrc.second);
+ if (Idx != -1)
+ return AVX512ConversionTbl[Idx].Cost;
+ }
EVT SrcTy = TLI->getValueType(Src);
EVT DstTy = TLI->getValueType(Dst);
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2 },
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2 },
{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 4 },
+
+ { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 3 },
+ { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 3 },
};
static const TypeConversionCostTblEntry<MVT::SimpleValueType>
{ ISD::SETCC, MVT::v32i8, 1 },
};
+ static const CostTblEntry<MVT::SimpleValueType> AVX512CostTbl[] = {
+ { ISD::SETCC, MVT::v8i64, 1 },
+ { ISD::SETCC, MVT::v16i32, 1 },
+ { ISD::SETCC, MVT::v8f64, 1 },
+ { ISD::SETCC, MVT::v16f32, 1 },
+ };
+
+ if (ST->hasAVX512()) {
+ int Idx = CostTableLookup(AVX512CostTbl, ISD, MTy);
+ if (Idx != -1)
+ return LT.first * AVX512CostTbl[Idx].Cost;
+ }
+
if (ST->hasAVX2()) {
int Idx = CostTableLookup(AVX2CostTbl, ISD, MTy);
if (Idx != -1)
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX512
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX2
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX
;CHECK-AVX: cost of 4 {{.*}} zext
%D = zext <4 x i32> undef to <4 x i64>
+ ;CHECK-AVX512: cost of 3 {{.*}} %D1 = zext
+ %D1 = zext <16 x i32> undef to <16 x i64>
+
+ ;CHECK-AVX512: cost of 3 {{.*}} %D2 = sext
+ %D2 = sext <16 x i32> undef to <16 x i64>
+
+ ;CHECK-AVX512: cost of 1 {{.*}} %D3 = zext
+ %D3 = zext <16 x i16> undef to <16 x i32>
+ ;CHECK-AVX512: cost of 1 {{.*}} %D4 = zext
+ %D4 = zext <16 x i8> undef to <16 x i32>
+ ;CHECK-AVX512: cost of 2 {{.*}} %D5 = zext
+ %D5 = zext <16 x i1> undef to <16 x i32>
+
;CHECK-AVX2: cost of 2 {{.*}} trunc
;CHECK-AVX: cost of 4 {{.*}} trunc
%E = trunc <4 x i64> undef to <4 x i32>
;CHECK-AVX2: cost of 4 {{.*}} trunc
;CHECK-AVX: cost of 9 {{.*}} trunc
+ ;CHECK_AVX512: cost of 1 {{.*}} G = trunc
%G = trunc <8 x i64> undef to <8 x i32>
+ ;CHECK-AVX512: cost of 4 {{.*}} %G1 = trunc
+ %G1 = trunc <16 x i64> undef to <16 x i32>
+
ret i32 undef
}
%D1 = uitofp <8 x i32> %d to <8 x float>
ret void
}
+
+define void @fp_conv(<8 x float> %a, <16 x float>%b) {
+;CHECK-LABEL: for function 'fp_conv'
+ ; CHECK-AVX512: cost of 1 {{.*}} fpext
+ %A1 = fpext <8 x float> %a to <8 x double>
+
+ ; CHECK-AVX512: cost of 3 {{.*}} fpext
+ %A2 = fpext <16 x float> %b to <16 x double>
+
+ ; CHECK-AVX2: cost of 3 {{.*}} %A3 = fpext
+ ; CHECK-AVX512: cost of 1 {{.*}} %A3 = fpext
+ %A3 = fpext <8 x float> %a to <8 x double>
+
+ ; CHECK-AVX2: cost of 3 {{.*}} %A4 = fptrunc
+ ; CHECK-AVX512: cost of 1 {{.*}} %A4 = fptrunc
+ %A4 = fptrunc <8 x double> undef to <8 x float>
+
+ ; CHECK-AVX512: cost of 3 {{.*}} %A5 = fptrunc
+ %A5 = fptrunc <16 x double> undef to <16 x float>
+ ret void
+}
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck -check-prefix=CHECK -check-prefix=AVX1 %s
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck -check-prefix=CHECK -check-prefix=AVX2 %s
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck -check-prefix=CHECK -check-prefix=AVX512 %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
;AVX2: cost of 1 {{.*}} fcmp
%E = fcmp olt <4 x double> undef, undef
+ ; AVX512: cost of 1 {{.*}} %E1 = fcmp
+ %E1 = fcmp olt <16 x float> undef, undef
+ ; AVX512: cost of 2 {{.*}} %E2 = fcmp
+ %E2 = fcmp olt <16 x double> undef, undef
+
; -- integers --
;AVX1: cost of 1 {{.*}} icmp
;AVX2: cost of 1 {{.*}} icmp
%M = icmp eq <32 x i8> undef, undef
+ ; AVX512: cost of 1 {{.*}} %M1 = icmp
+ %M1 = icmp eq <16 x i32> undef, undef
+ ; AVX512: cost of 2 {{.*}} %M2 = icmp
+ %M2 = icmp eq <16 x i64> undef, undef
+
;CHECK: cost of 0 {{.*}} ret
ret i32 undef
}