From ba60f1665674bed285cb97c8ee8035d8b5d5c081 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 8 May 2016 15:24:53 +0000 Subject: [PATCH] [CostModel][X86] Extended comparison instruction cost model tests to include SSE2/SSE3/SSSE3/SSE41/SSE42 targets git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268877 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Analysis/CostModel/X86/cmp.ll | 146 ++++++++++++++++++++++------- 1 file changed, 113 insertions(+), 33 deletions(-) diff --git a/test/Analysis/CostModel/X86/cmp.ll b/test/Analysis/CostModel/X86/cmp.ll index 469cd735f8a..80fbbc47089 100644 --- a/test/Analysis/CostModel/X86/cmp.ll +++ b/test/Analysis/CostModel/X86/cmp.ll @@ -1,64 +1,144 @@ -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck -check-prefix=CHECK -check-prefix=AVX1 %s -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck -check-prefix=CHECK -check-prefix=AVX2 %s -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck -check-prefix=CHECK -check-prefix=AVX512 %s +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=pentium4 | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE2 %s +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=yonah | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE3 %s +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSSE3 %s +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=penryn | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE41 %s +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE42 %s +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1 %s +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2 %s +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" define i32 @cmp(i32 %arg) { ; -- floats -- - ;AVX1: cost of 1 {{.*}} fcmp - ;AVX2: cost of 1 {{.*}} fcmp + ;SSE2: cost of 3 {{.*}} fcmp + ;SSE3: cost of 3 {{.*}} fcmp + ;SSSE3: cost of 3 {{.*}} fcmp + ;SSE41: cost of 3 {{.*}} fcmp + ;SSE42: cost of 1 {{.*}} fcmp + ;AVX: cost of 1 {{.*}} fcmp %A = fcmp olt <2 x float> undef, undef - ;AVX1: cost of 1 {{.*}} fcmp - ;AVX2: cost of 1 {{.*}} fcmp + + ;SSE2: cost of 7 {{.*}} fcmp + ;SSE3: cost of 7 {{.*}} fcmp + ;SSSE3: cost of 7 {{.*}} fcmp + ;SSE41: cost of 7 {{.*}} fcmp + ;SSE42: cost of 1 {{.*}} fcmp + ;AVX: cost of 1 {{.*}} fcmp %B = fcmp olt <4 x float> undef, undef - ;AVX1: cost of 1 {{.*}} fcmp - ;AVX2: cost of 1 {{.*}} fcmp + + ;SSE2: cost of 14 {{.*}} fcmp + ;SSE3: cost of 14 {{.*}} fcmp + ;SSSE3: cost of 14 {{.*}} fcmp + ;SSE41: cost of 14 {{.*}} fcmp + ;SSE42: cost of 2 {{.*}} fcmp + ;AVX: cost of 1 {{.*}} fcmp %C = fcmp olt <8 x float> undef, undef - ;AVX1: cost of 1 {{.*}} fcmp - ;AVX2: cost of 1 {{.*}} fcmp + + ;SSE2: cost of 3 {{.*}} fcmp + ;SSE3: cost of 3 {{.*}} fcmp + ;SSSE3: cost of 3 {{.*}} fcmp + ;SSE41: cost of 3 {{.*}} fcmp + ;SSE42: cost of 1 {{.*}} fcmp + ;AVX: cost of 1 {{.*}} fcmp %D = fcmp olt <2 x double> undef, undef - ;AVX1: cost of 1 {{.*}} fcmp - ;AVX2: cost of 1 {{.*}} fcmp + + ;SSE2: cost of 6 {{.*}} fcmp + ;SSE3: cost of 6 {{.*}} fcmp + ;SSSE3: cost of 6 {{.*}} fcmp + ;SSE41: cost of 6 {{.*}} fcmp + ;SSE42: cost of 2 {{.*}} fcmp + ;AVX: cost of 1 {{.*}} fcmp %E = fcmp olt <4 x double> undef, undef ; AVX512: cost of 1 {{.*}} %E1 = fcmp %E1 = fcmp olt <16 x float> undef, undef - ; AVX512: cost of 2 {{.*}} %E2 = fcmp - %E2 = fcmp olt <16 x double> undef, undef + + ; AVX512: cost of 1 {{.*}} %E2 = fcmp + %E2 = fcmp olt <8 x double> undef, undef + + ; AVX512: cost of 2 {{.*}} %E3 = fcmp + %E3 = fcmp olt <16 x double> undef, undef ; -- integers -- - ;AVX1: cost of 1 {{.*}} icmp - ;AVX2: cost of 1 {{.*}} icmp + ;SSE2: cost of 1 {{.*}} icmp + ;SSE3: cost of 1 {{.*}} icmp + ;SSSE3: cost of 1 {{.*}} icmp + ;SSE41: cost of 1 {{.*}} icmp + ;SSE42: cost of 1 {{.*}} icmp + ;AVX: cost of 1 {{.*}} icmp %F = icmp eq <16 x i8> undef, undef - ;AVX1: cost of 1 {{.*}} icmp - ;AVX2: cost of 1 {{.*}} icmp + + ;SSE2: cost of 1 {{.*}} icmp + ;SSE3: cost of 1 {{.*}} icmp + ;SSSE3: cost of 1 {{.*}} icmp + ;SSE41: cost of 1 {{.*}} icmp + ;SSE42: cost of 1 {{.*}} icmp + ;AVX: cost of 1 {{.*}} icmp %G = icmp eq <8 x i16> undef, undef - ;AVX1: cost of 1 {{.*}} icmp - ;AVX2: cost of 1 {{.*}} icmp + + ;SSE2: cost of 1 {{.*}} icmp + ;SSE3: cost of 1 {{.*}} icmp + ;SSSE3: cost of 1 {{.*}} icmp + ;SSE41: cost of 1 {{.*}} icmp + ;SSE42: cost of 1 {{.*}} icmp + ;AVX: cost of 1 {{.*}} icmp %H = icmp eq <4 x i32> undef, undef - ;AVX1: cost of 1 {{.*}} icmp - ;AVX2: cost of 1 {{.*}} icmp + + ;SSE2: cost of 1 {{.*}} icmp + ;SSE3: cost of 1 {{.*}} icmp + ;SSSE3: cost of 1 {{.*}} icmp + ;SSE41: cost of 1 {{.*}} icmp + ;SSE42: cost of 1 {{.*}} icmp + ;AVX: cost of 1 {{.*}} icmp %I = icmp eq <2 x i64> undef, undef - ;AVX1: cost of 4 {{.*}} icmp - ;AVX2: cost of 1 {{.*}} icmp + + ;SSE2: cost of 2 {{.*}} icmp + ;SSE3: cost of 2 {{.*}} icmp + ;SSSE3: cost of 2 {{.*}} icmp + ;SSE41: cost of 2 {{.*}} icmp + ;SSE42: cost of 2 {{.*}} icmp + ;AVX1: cost of 4 {{.*}} icmp + ;AVX2: cost of 1 {{.*}} icmp %J = icmp eq <4 x i64> undef, undef - ;AVX1: cost of 4 {{.*}} icmp - ;AVX2: cost of 1 {{.*}} icmp + + ;SSE2: cost of 2 {{.*}} icmp + ;SSE3: cost of 2 {{.*}} icmp + ;SSSE3: cost of 2 {{.*}} icmp + ;SSE41: cost of 2 {{.*}} icmp + ;SSE42: cost of 2 {{.*}} icmp + ;AVX1: cost of 4 {{.*}} icmp + ;AVX2: cost of 1 {{.*}} icmp %K = icmp eq <8 x i32> undef, undef - ;AVX1: cost of 4 {{.*}} icmp - ;AVX2: cost of 1 {{.*}} icmp + + ;SSE2: cost of 2 {{.*}} icmp + ;SSE3: cost of 2 {{.*}} icmp + ;SSSE3: cost of 2 {{.*}} icmp + ;SSE41: cost of 2 {{.*}} icmp + ;SSE42: cost of 2 {{.*}} icmp + ;AVX1: cost of 4 {{.*}} icmp + ;AVX2: cost of 1 {{.*}} icmp %L = icmp eq <16 x i16> undef, undef - ;AVX1: cost of 4 {{.*}} icmp - ;AVX2: cost of 1 {{.*}} icmp + + ;SSE2: cost of 2 {{.*}} icmp + ;SSE3: cost of 2 {{.*}} icmp + ;SSSE3: cost of 2 {{.*}} icmp + ;SSE41: cost of 2 {{.*}} icmp + ;SSE42: cost of 2 {{.*}} icmp + ;AVX1: cost of 4 {{.*}} icmp + ;AVX2: cost of 1 {{.*}} icmp %M = icmp eq <32 x i8> undef, undef ; AVX512: cost of 1 {{.*}} %M1 = icmp %M1 = icmp eq <16 x i32> undef, undef - ; AVX512: cost of 2 {{.*}} %M2 = icmp - %M2 = icmp eq <16 x i64> undef, undef + + ; AVX512: cost of 1 {{.*}} %M2 = icmp + %M2 = icmp eq <8 x i64> undef, undef + + ; AVX512: cost of 2 {{.*}} %M3 = icmp + %M3 = icmp eq <16 x i64> undef, undef ;CHECK: cost of 0 {{.*}} ret ret i32 undef -- 2.50.1