From 3ad5f1a3f251b852f7deefa6749a49148bb784f9 Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Wed, 1 Feb 2017 18:09:47 +0000 Subject: [PATCH] [X86] Extend single-source shuffle cost test to test more arches. NFC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@293793 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../CostModel/X86/shuffle-single-src.ll | 151 +++++++++++++++--- 1 file changed, 129 insertions(+), 22 deletions(-) diff --git a/test/Analysis/CostModel/X86/shuffle-single-src.ll b/test/Analysis/CostModel/X86/shuffle-single-src.ll index a953ec17d80..ba1a59da2c2 100644 --- a/test/Analysis/CostModel/X86/shuffle-single-src.ll +++ b/test/Analysis/CostModel/X86/shuffle-single-src.ll @@ -1,30 +1,61 @@ -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake-avx512 | FileCheck %s --check-prefix=SKX +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+ssse3 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSSE3 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.2 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW ; ; Verify the cost model for 1 src shuffles ; -; SKX-LABEL: 'test_vXf64' +; AVX512-LABEL: 'test_vXf64' define void @test_vXf64(<4 x double> %src256, <8 x double> %src512, <16 x double> %src1024) { - ; SKX: cost of 1 {{.*}} %V256 = shufflevector + ; SSE2: cost of 4 {{.*}} %V256 = shufflevector + ; SSSE3: cost of 4 {{.*}} %V256 = shufflevector + ; SSE42: cost of 4 {{.*}} %V256 = shufflevector + ; AVX1: cost of 6 {{.*}} %V256 = shufflevector + ; AVX2: cost of 6 {{.*}} %V256 = shufflevector + ; AVX512: cost of 1 {{.*}} %V256 = shufflevector %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> - ; SKX: cost of 1 {{.*}} %V512 = shufflevector + ; SSE2: cost of 24 {{.*}} %V512 = shufflevector + ; SSSE3: cost of 24 {{.*}} %V512 = shufflevector + ; SSE42: cost of 24 {{.*}} %V512 = shufflevector + ; AVX1: cost of 12 {{.*}} %V512 = shufflevector + ; AVX2: cost of 12 {{.*}} %V512 = shufflevector + ; AVX512: cost of 1 {{.*}} %V512 = shufflevector %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> - ; SKX: cost of 2 {{.*}} %V1024 = shufflevector + ; SSE2: cost of 112 {{.*}} %V1024 = shufflevector + ; SSSE3: cost of 112 {{.*}} %V1024 = shufflevector + ; SSE42: cost of 112 {{.*}} %V1024 = shufflevector + ; AVX1: cost of 72 {{.*}} %V1024 = shufflevector + ; AVX2: cost of 72 {{.*}} %V1024 = shufflevector + ; AVX512: cost of 2 {{.*}} %V1024 = shufflevector %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> ret void } -; SKX-LABEL: 'test_vXi64' +; AVX512-LABEL: 'test_vXi64' define void @test_vXi64(<4 x i64> %src256, <8 x i64> %src512) { - ; SKX: cost of 1 {{.*}} %V256 = shufflevector + ; SSE2: cost of 8 {{.*}} %V256 = shufflevector + ; SSSE3: cost of 8 {{.*}} %V256 = shufflevector + ; SSE42: cost of 8 {{.*}} %V256 = shufflevector + ; AVX1: cost of 8 {{.*}} %V256 = shufflevector + ; AVX2: cost of 8 {{.*}} %V256 = shufflevector + ; AVX512: cost of 1 {{.*}} %V256 = shufflevector %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> - ; SKX: cost of 1 {{.*}} %V512 = shufflevector + ; SSE2: cost of 48 {{.*}} %V512 = shufflevector + ; SSSE3: cost of 48 {{.*}} %V512 = shufflevector + ; SSE42: cost of 48 {{.*}} %V512 = shufflevector + ; AVX1: cost of 16 {{.*}} %V512 = shufflevector + ; AVX2: cost of 16 {{.*}} %V512 = shufflevector + ; AVX512: cost of 1 {{.*}} %V512 = shufflevector %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> ret void @@ -33,13 +64,28 @@ define void @test_vXi64(<4 x i64> %src256, <8 x i64> %src512) { ; CHECK-LABEL: 'test_vXf32' define void @test_vXf32(<4 x float> %src128, <8 x float> %src256, <16 x float> %src512) { - ; SKX: cost of 1 {{.*}} %V128 = shufflevector + ; SSE2: cost of 6 {{.*}} %V128 = shufflevector + ; SSSE3: cost of 6 {{.*}} %V128 = shufflevector + ; SSE42: cost of 6 {{.*}} %V128 = shufflevector + ; AVX1: cost of 6 {{.*}} %V128 = shufflevector + ; AVX2: cost of 6 {{.*}} %V128 = shufflevector + ; AVX512: cost of 1 {{.*}} %V128 = shufflevector %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> - ; SKX: cost of 1 {{.*}} %V256 = shufflevector + ; SSE2: cost of 12 {{.*}} %V256 = shufflevector + ; SSSE3: cost of 12 {{.*}} %V256 = shufflevector + ; SSE42: cost of 12 {{.*}} %V256 = shufflevector + ; AVX1: cost of 14 {{.*}} %V256 = shufflevector + ; AVX2: cost of 14 {{.*}} %V256 = shufflevector + ; AVX512: cost of 1 {{.*}} %V256 = shufflevector %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> - ; SKX: cost of 1 {{.*}} %V512 = shufflevector + ; SSE2: cost of 72 {{.*}} %V512 = shufflevector + ; SSSE3: cost of 72 {{.*}} %V512 = shufflevector + ; SSE42: cost of 72 {{.*}} %V512 = shufflevector + ; AVX1: cost of 28 {{.*}} %V512 = shufflevector + ; AVX2: cost of 28 {{.*}} %V512 = shufflevector + ; AVX512: cost of 1 {{.*}} %V512 = shufflevector %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> ret void @@ -48,16 +94,36 @@ define void @test_vXf32(<4 x float> %src128, <8 x float> %src256, <16 x float> % ; CHECK-LABEL: 'test_vXi32' define void @test_vXi32(<4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512, <32 x i32> %src1024) { - ; SKX: cost of 1 {{.*}} %V128 = shufflevector + ; SSE2: cost of 8 {{.*}} %V128 = shufflevector + ; SSSE3: cost of 8 {{.*}} %V128 = shufflevector + ; SSE42: cost of 8 {{.*}} %V128 = shufflevector + ; AVX1: cost of 8 {{.*}} %V128 = shufflevector + ; AVX2: cost of 8 {{.*}} %V128 = shufflevector + ; AVX512: cost of 1 {{.*}} %V128 = shufflevector %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> - ; SKX: cost of 1 {{.*}} %V256 = shufflevector + ; SSE2: cost of 16 {{.*}} %V256 = shufflevector + ; SSSE3: cost of 16 {{.*}} %V256 = shufflevector + ; SSE42: cost of 16 {{.*}} %V256 = shufflevector + ; AVX1: cost of 16 {{.*}} %V256 = shufflevector + ; AVX2: cost of 16 {{.*}} %V256 = shufflevector + ; AVX512: cost of 1 {{.*}} %V256 = shufflevector %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> - ; SKX: cost of 1 {{.*}} %V512 = shufflevector + ; SSE2: cost of 96 {{.*}} %V512 = shufflevector + ; SSSE3: cost of 96 {{.*}} %V512 = shufflevector + ; SSE42: cost of 96 {{.*}} %V512 = shufflevector + ; AVX1: cost of 32 {{.*}} %V512 = shufflevector + ; AVX2: cost of 32 {{.*}} %V512 = shufflevector + ; AVX512: cost of 1 {{.*}} %V512 = shufflevector %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> - ; SKX: cost of 2 {{.*}} %V1024 = shufflevector + ; SSE2: cost of 448 {{.*}} %V1024 = shufflevector + ; SSSE3: cost of 448 {{.*}} %V1024 = shufflevector + ; SSE42: cost of 448 {{.*}} %V1024 = shufflevector + ; AVX1: cost of 192 {{.*}} %V1024 = shufflevector + ; AVX2: cost of 192 {{.*}} %V1024 = shufflevector + ; AVX512: cost of 2 {{.*}} %V1024 = shufflevector %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> ret void } @@ -65,29 +131,70 @@ define void @test_vXi32(<4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512 ; CHECK-LABEL: 'test_vXi16' define void @test_vXi16(<8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512, <64 x i16> %src1024) { - ; SKX: cost of 1 {{.*}} %V128 = shufflevector + ; SSE2: cost of 16 {{.*}} %V128 = shufflevector + ; SSSE3: cost of 16 {{.*}} %V128 = shufflevector + ; SSE42: cost of 16 {{.*}} %V128 = shufflevector + ; AVX1: cost of 16 {{.*}} %V128 = shufflevector + ; AVX2: cost of 16 {{.*}} %V128 = shufflevector + ; AVX512F: cost of 16 {{.*}} %V128 = shufflevector + ; AVX512BW: cost of 1 {{.*}} %V128 = shufflevector %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> - ; SKX: cost of 1 {{.*}} %V256 = shufflevector + ; SSE2: cost of 32 {{.*}} %V256 = shufflevector + ; SSSE3: cost of 32 {{.*}} %V256 = shufflevector + ; SSE42: cost of 32 {{.*}} %V256 = shufflevector + ; AVX1: cost of 32 {{.*}} %V256 = shufflevector + ; AVX2: cost of 32 {{.*}} %V256 = shufflevector + ; AVX512F: cost of 32 {{.*}} %V256 = shufflevector + ; AVX512BW cost of 1 {{.*}} %V256 = shufflevector %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> - ; SKX: cost of 1 {{.*}} %V512 = shufflevector + ; SSE2: cost of 192 {{.*}} %V512 = shufflevector + ; SSSE3: cost of 192 {{.*}} %V512 = shufflevector + ; SSE42: cost of 192 {{.*}} %V512 = shufflevector + ; AVX1: cost of 64 {{.*}} %V512 = shufflevector + ; AVX2: cost of 64 {{.*}} %V512 = shufflevector + ; AVX512F: cost of 64 {{.*}} %V512 = shufflevector + ; AVX512BW: cost of 1 {{.*}} %V512 = shufflevector %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> - ; SKX: cost of 2 {{.*}} %V1024 = shufflevector + ; SSE2: cost of 896 {{.*}} %V1024 = shufflevector + ; SSSE3: cost of 896 {{.*}} %V1024 = shufflevector + ; SSE42: cost of 896 {{.*}} %V1024 = shufflevector + ; AVX1: cost of 384 {{.*}} %V1024 = shufflevector + ; AVX2: cost of 384 {{.*}} %V1024 = shufflevector + ; AVX512F: cost of 384 {{.*}} %V1024 = shufflevector + ; AVX512BW: cost of 2 {{.*}} %V1024 = shufflevector %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> ret void } ; CHECK-LABEL: 'test_vXi8' define void @test_vXi8(<16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { - ; SKX: cost of 1 {{.*}} %V128 = shufflevector + ; SSE2: cost of 32 {{.*}} %V128 = shufflevector + ; SSSE3: cost of 32 {{.*}} %V128 = shufflevector + ; SSE42: cost of 32 {{.*}} %V128 = shufflevector + ; AVX1: cost of 32 {{.*}} %V128 = shufflevector + ; AVX2: cost of 32 {{.*}} %V128 = shufflevector + ; AVX512: cost of 1 {{.*}} %V128 = shufflevector %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> - ; SKX: cost of 3 {{.*}} %V256 = shufflevector + ; SSE2: cost of 64 {{.*}} %V256 = shufflevector + ; SSSE3: cost of 64 {{.*}} %V256 = shufflevector + ; SSE42: cost of 64 {{.*}} %V256 = shufflevector + ; AVX1: cost of 64 {{.*}} %V256 = shufflevector + ; AVX2: cost of 64 {{.*}} %V256 = shufflevector + ; AVX512F: cost of 64 {{.*}} %V256 = shufflevector + ; AVX512BW: cost of 3 {{.*}} %V256 = shufflevector %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> - ; SKX: cost of 8 {{.*}} %V512 = shufflevector + ; SSE2: cost of 384 {{.*}} %V512 = shufflevector + ; SSSE3: cost of 384 {{.*}} %V512 = shufflevector + ; SSE42: cost of 384 {{.*}} %V512 = shufflevector + ; AVX1: cost of 128 {{.*}} %V512 = shufflevector + ; AVX2: cost of 128 {{.*}} %V512 = shufflevector + ; AVX512F: cost of 128 {{.*}} %V512 = shufflevector + ; AVX512BW: cost of 8 {{.*}} %V512 = shufflevector %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ret void -- 2.50.1