From: Simon Pilgrim Date: Thu, 20 Oct 2016 17:16:38 +0000 (+0000) Subject: [CostModel][X86] Added tests for sdiv/udiv costs for uniform const and uniform const... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ca13ae18d80696cf230af3b9d07cf177428f778d;p=llvm [CostModel][X86] Added tests for sdiv/udiv costs for uniform const and uniform const power-of-2 Shows poor costings in AVX1/AVX512BW for certain vector types git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@284748 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/Analysis/CostModel/X86/div.ll b/test/Analysis/CostModel/X86/div.ll index 2fe58e19cf2..620420c24b2 100644 --- a/test/Analysis/CostModel/X86/div.ll +++ b/test/Analysis/CostModel/X86/div.ll @@ -114,3 +114,267 @@ define i32 @udiv() { ret i32 undef } + +; CHECK-LABEL: 'sdiv_uniformconst' +define i32 @sdiv_uniformconst() { + ; CHECK: cost of 1 {{.*}} %I64 = sdiv + %I64 = sdiv i64 undef, 7 + ; SSE: cost of 40 {{.*}} %V2i64 = sdiv + ; AVX: cost of 40 {{.*}} %V2i64 = sdiv + %V2i64 = sdiv <2 x i64> undef, + ; SSE: cost of 80 {{.*}} %V4i64 = sdiv + ; AVX: cost of 80 {{.*}} %V4i64 = sdiv + %V4i64 = sdiv <4 x i64> undef, + ; SSE: cost of 160 {{.*}} %V8i64 = sdiv + ; AVX: cost of 160 {{.*}} %V8i64 = sdiv + %V8i64 = sdiv <8 x i64> undef, + + ; CHECK: cost of 1 {{.*}} %I32 = sdiv + %I32 = sdiv i32 undef, 7 + ; SSE2: cost of 19 {{.*}} %V4i32 = sdiv + ; SSSE3: cost of 19 {{.*}} %V4i32 = sdiv + ; SSE42: cost of 15 {{.*}} %V4i32 = sdiv + ; AVX: cost of 15 {{.*}} %V4i32 = sdiv + %V4i32 = sdiv <4 x i32> undef, + ; SSE2: cost of 38 {{.*}} %V8i32 = sdiv + ; SSSE3: cost of 38 {{.*}} %V8i32 = sdiv + ; SSE42: cost of 30 {{.*}} %V8i32 = sdiv + ; AVX1: cost of 160 {{.*}} %V8i32 = sdiv + ; AVX2: cost of 15 {{.*}} %V8i32 = sdiv + ; AVX512: cost of 15 {{.*}} %V8i32 = sdiv + %V8i32 = sdiv <8 x i32> undef, + ; SSE2: cost of 76 {{.*}} %V16i32 = sdiv + ; SSSE3: cost of 76 {{.*}} %V16i32 = sdiv + ; SSE42: cost of 60 {{.*}} %V16i32 = sdiv + ; AVX1: cost of 320 {{.*}} %V16i32 = sdiv + ; AVX2: cost of 30 {{.*}} %V16i32 = sdiv + ; AVX512F: cost of 48 {{.*}} %V16i32 = sdiv + ; AVX512BW: cost of 320 {{.*}} %V16i32 = sdiv + %V16i32 = sdiv <16 x i32> undef, + + ; CHECK: cost of 1 {{.*}} %I16 = sdiv + %I16 = sdiv i16 undef, 7 + ; SSE: cost of 6 {{.*}} %V8i16 = sdiv + ; AVX: cost of 6 {{.*}} %V8i16 = sdiv + %V8i16 = sdiv <8 x i16> undef, + ; SSE: cost of 12 {{.*}} %V16i16 = sdiv + ; AVX1: cost of 320 {{.*}} %V16i16 = sdiv + ; AVX2: cost of 6 {{.*}} %V16i16 = sdiv + ; AVX512: cost of 6 {{.*}} %V16i16 = sdiv + %V16i16 = sdiv <16 x i16> undef, + ; SSE: cost of 24 {{.*}} %V32i16 = sdiv + ; AVX1: cost of 640 {{.*}} %V32i16 = sdiv + ; AVX2: cost of 12 {{.*}} %V32i16 = sdiv + ; AVX512F: cost of 12 {{.*}} %V32i16 = sdiv + ; AVX512BW: cost of 640 {{.*}} %V32i16 = sdiv + %V32i16 = sdiv <32 x i16> undef, + + ; CHECK: cost of 1 {{.*}} %I8 = sdiv + %I8 = sdiv i8 undef, 7 + ; SSE: cost of 320 {{.*}} %V16i8 = sdiv + ; AVX: cost of 320 {{.*}} %V16i8 = sdiv + %V16i8 = sdiv <16 x i8> undef, + ; SSE: cost of 640 {{.*}} %V32i8 = sdiv + ; AVX: cost of 640 {{.*}} %V32i8 = sdiv + %V32i8 = sdiv <32 x i8> undef, + ; SSE: cost of 1280 {{.*}} %V64i8 = sdiv + ; AVX: cost of 1280 {{.*}} %V64i8 = sdiv + %V64i8 = sdiv <64 x i8> undef, + + ret i32 undef +} + +; CHECK-LABEL: 'udiv_uniformconst' +define i32 @udiv_uniformconst() { + ; CHECK: cost of 1 {{.*}} %I64 = udiv + %I64 = udiv i64 undef, 7 + ; SSE: cost of 40 {{.*}} %V2i64 = udiv + ; AVX: cost of 40 {{.*}} %V2i64 = udiv + %V2i64 = udiv <2 x i64> undef, + ; SSE: cost of 80 {{.*}} %V4i64 = udiv + ; AVX: cost of 80 {{.*}} %V4i64 = udiv + %V4i64 = udiv <4 x i64> undef, + ; SSE: cost of 160 {{.*}} %V8i64 = udiv + ; AVX: cost of 160 {{.*}} %V8i64 = udiv + %V8i64 = udiv <8 x i64> undef, + + ; CHECK: cost of 1 {{.*}} %I32 = udiv + %I32 = udiv i32 undef, 7 + ; SSE: cost of 15 {{.*}} %V4i32 = udiv + ; AVX: cost of 15 {{.*}} %V4i32 = udiv + %V4i32 = udiv <4 x i32> undef, + ; SSE: cost of 30 {{.*}} %V8i32 = udiv + ; AVX1: cost of 160 {{.*}} %V8i32 = udiv + ; AVX2: cost of 15 {{.*}} %V8i32 = udiv + ; AVX512: cost of 15 {{.*}} %V8i32 = udiv + %V8i32 = udiv <8 x i32> undef, + ; SSE: cost of 60 {{.*}} %V16i32 = udiv + ; AVX1: cost of 320 {{.*}} %V16i32 = udiv + ; AVX2: cost of 30 {{.*}} %V16i32 = udiv + ; AVX512F: cost of 48 {{.*}} %V16i32 = udiv + ; AVX512BW: cost of 320 {{.*}} %V16i32 = udiv + %V16i32 = udiv <16 x i32> undef, + + ; CHECK: cost of 1 {{.*}} %I16 = udiv + %I16 = udiv i16 undef, 7 + ; SSE: cost of 6 {{.*}} %V8i16 = udiv + ; AVX: cost of 6 {{.*}} %V8i16 = udiv + %V8i16 = udiv <8 x i16> undef, + ; SSE: cost of 12 {{.*}} %V16i16 = udiv + ; AVX1: cost of 320 {{.*}} %V16i16 = udiv + ; AVX2: cost of 6 {{.*}} %V16i16 = udiv + ; AVX512: cost of 6 {{.*}} %V16i16 = udiv + %V16i16 = udiv <16 x i16> undef, + ; SSE: cost of 24 {{.*}} %V32i16 = udiv + ; AVX1: cost of 640 {{.*}} %V32i16 = udiv + ; AVX2: cost of 12 {{.*}} %V32i16 = udiv + ; AVX512F: cost of 12 {{.*}} %V32i16 = udiv + ; AVX512BW: cost of 640 {{.*}} %V32i16 = udiv + %V32i16 = udiv <32 x i16> undef, + + ; CHECK: cost of 1 {{.*}} %I8 = udiv + %I8 = udiv i8 undef, 7 + ; SSE: cost of 320 {{.*}} %V16i8 = udiv + ; AVX: cost of 320 {{.*}} %V16i8 = udiv + %V16i8 = udiv <16 x i8> undef, + ; SSE: cost of 640 {{.*}} %V32i8 = udiv + ; AVX: cost of 640 {{.*}} %V32i8 = udiv + %V32i8 = udiv <32 x i8> undef, + ; SSE: cost of 1280 {{.*}} %V64i8 = udiv + ; AVX: cost of 1280 {{.*}} %V64i8 = udiv + %V64i8 = udiv <64 x i8> undef, + + ret i32 undef +} + +; CHECK-LABEL: 'sdiv_uniformconstpow2' +define i32 @sdiv_uniformconstpow2() { + ; CHECK: cost of 1 {{.*}} %I64 = sdiv + %I64 = sdiv i64 undef, 16 + ; SSE: cost of 40 {{.*}} %V2i64 = sdiv + ; AVX: cost of 40 {{.*}} %V2i64 = sdiv + %V2i64 = sdiv <2 x i64> undef, + ; SSE: cost of 80 {{.*}} %V4i64 = sdiv + ; AVX: cost of 80 {{.*}} %V4i64 = sdiv + %V4i64 = sdiv <4 x i64> undef, + ; SSE: cost of 160 {{.*}} %V8i64 = sdiv + ; AVX: cost of 160 {{.*}} %V8i64 = sdiv + %V8i64 = sdiv <8 x i64> undef, + + ; CHECK: cost of 1 {{.*}} %I32 = sdiv + %I32 = sdiv i32 undef, 16 + ; SSE2: cost of 19 {{.*}} %V4i32 = sdiv + ; SSSE3: cost of 19 {{.*}} %V4i32 = sdiv + ; SSE42: cost of 15 {{.*}} %V4i32 = sdiv + ; AVX: cost of 15 {{.*}} %V4i32 = sdiv + %V4i32 = sdiv <4 x i32> undef, + ; SSE2: cost of 38 {{.*}} %V8i32 = sdiv + ; SSSE3: cost of 38 {{.*}} %V8i32 = sdiv + ; SSE42: cost of 30 {{.*}} %V8i32 = sdiv + ; AVX1: cost of 160 {{.*}} %V8i32 = sdiv + ; AVX2: cost of 15 {{.*}} %V8i32 = sdiv + ; AVX512: cost of 15 {{.*}} %V8i32 = sdiv + %V8i32 = sdiv <8 x i32> undef, + ; SSE2: cost of 76 {{.*}} %V16i32 = sdiv + ; SSSE3: cost of 76 {{.*}} %V16i32 = sdiv + ; SSE42: cost of 60 {{.*}} %V16i32 = sdiv + ; AVX1: cost of 320 {{.*}} %V16i32 = sdiv + ; AVX2: cost of 30 {{.*}} %V16i32 = sdiv + ; AVX512F: cost of 48 {{.*}} %V16i32 = sdiv + ; AVX512BW: cost of 320 {{.*}} %V16i32 = sdiv + %V16i32 = sdiv <16 x i32> undef, + + ; CHECK: cost of 1 {{.*}} %I16 = sdiv + %I16 = sdiv i16 undef, 16 + ; SSE: cost of 6 {{.*}} %V8i16 = sdiv + ; AVX: cost of 6 {{.*}} %V8i16 = sdiv + %V8i16 = sdiv <8 x i16> undef, + ; SSE: cost of 12 {{.*}} %V16i16 = sdiv + ; AVX1: cost of 320 {{.*}} %V16i16 = sdiv + ; AVX2: cost of 6 {{.*}} %V16i16 = sdiv + ; AVX512: cost of 6 {{.*}} %V16i16 = sdiv + %V16i16 = sdiv <16 x i16> undef, + ; SSE: cost of 24 {{.*}} %V32i16 = sdiv + ; AVX1: cost of 640 {{.*}} %V32i16 = sdiv + ; AVX2: cost of 12 {{.*}} %V32i16 = sdiv + ; AVX512F: cost of 12 {{.*}} %V32i16 = sdiv + ; AVX512BW: cost of 640 {{.*}} %V32i16 = sdiv + %V32i16 = sdiv <32 x i16> undef, + + ; CHECK: cost of 1 {{.*}} %I8 = sdiv + %I8 = sdiv i8 undef, 16 + ; SSE: cost of 320 {{.*}} %V16i8 = sdiv + ; AVX: cost of 320 {{.*}} %V16i8 = sdiv + %V16i8 = sdiv <16 x i8> undef, + ; SSE: cost of 640 {{.*}} %V32i8 = sdiv + ; AVX: cost of 640 {{.*}} %V32i8 = sdiv + %V32i8 = sdiv <32 x i8> undef, + ; SSE: cost of 1280 {{.*}} %V64i8 = sdiv + ; AVX: cost of 1280 {{.*}} %V64i8 = sdiv + %V64i8 = sdiv <64 x i8> undef, + + ret i32 undef +} + +; CHECK-LABEL: 'udiv_uniformconstpow2' +define i32 @udiv_uniformconstpow2() { + ; CHECK: cost of 1 {{.*}} %I64 = udiv + %I64 = udiv i64 undef, 16 + ; SSE: cost of 40 {{.*}} %V2i64 = udiv + ; AVX: cost of 40 {{.*}} %V2i64 = udiv + %V2i64 = udiv <2 x i64> undef, + ; SSE: cost of 80 {{.*}} %V4i64 = udiv + ; AVX: cost of 80 {{.*}} %V4i64 = udiv + %V4i64 = udiv <4 x i64> undef, + ; SSE: cost of 160 {{.*}} %V8i64 = udiv + ; AVX: cost of 160 {{.*}} %V8i64 = udiv + %V8i64 = udiv <8 x i64> undef, + + ; CHECK: cost of 1 {{.*}} %I32 = udiv + %I32 = udiv i32 undef, 16 + ; SSE: cost of 15 {{.*}} %V4i32 = udiv + ; AVX: cost of 15 {{.*}} %V4i32 = udiv + %V4i32 = udiv <4 x i32> undef, + ; SSE: cost of 30 {{.*}} %V8i32 = udiv + ; AVX1: cost of 160 {{.*}} %V8i32 = udiv + ; AVX2: cost of 15 {{.*}} %V8i32 = udiv + ; AVX512: cost of 15 {{.*}} %V8i32 = udiv + %V8i32 = udiv <8 x i32> undef, + ; SSE: cost of 60 {{.*}} %V16i32 = udiv + ; AVX1: cost of 320 {{.*}} %V16i32 = udiv + ; AVX2: cost of 30 {{.*}} %V16i32 = udiv + ; AVX512F: cost of 48 {{.*}} %V16i32 = udiv + ; AVX512BW: cost of 320 {{.*}} %V16i32 = udiv + %V16i32 = udiv <16 x i32> undef, + + ; CHECK: cost of 1 {{.*}} %I16 = udiv + %I16 = udiv i16 undef, 16 + ; SSE: cost of 6 {{.*}} %V8i16 = udiv + ; AVX: cost of 6 {{.*}} %V8i16 = udiv + %V8i16 = udiv <8 x i16> undef, + ; SSE: cost of 12 {{.*}} %V16i16 = udiv + ; AVX1: cost of 320 {{.*}} %V16i16 = udiv + ; AVX2: cost of 6 {{.*}} %V16i16 = udiv + ; AVX512: cost of 6 {{.*}} %V16i16 = udiv + %V16i16 = udiv <16 x i16> undef, + ; SSE: cost of 24 {{.*}} %V32i16 = udiv + ; AVX1: cost of 640 {{.*}} %V32i16 = udiv + ; AVX2: cost of 12 {{.*}} %V32i16 = udiv + ; AVX512F: cost of 12 {{.*}} %V32i16 = udiv + ; AVX512BW: cost of 640 {{.*}} %V32i16 = udiv + %V32i16 = udiv <32 x i16> undef, + + ; CHECK: cost of 1 {{.*}} %I8 = udiv + %I8 = udiv i8 undef, 16 + ; SSE: cost of 320 {{.*}} %V16i8 = udiv + ; AVX: cost of 320 {{.*}} %V16i8 = udiv + %V16i8 = udiv <16 x i8> undef, + ; SSE: cost of 640 {{.*}} %V32i8 = udiv + ; AVX: cost of 640 {{.*}} %V32i8 = udiv + %V32i8 = udiv <32 x i8> undef, + ; SSE: cost of 1280 {{.*}} %V64i8 = udiv + ; AVX: cost of 1280 {{.*}} %V64i8 = udiv + %V64i8 = udiv <64 x i8> undef, + + ret i32 undef +}