From: Michael Kuperstein Date: Thu, 2 Feb 2017 20:27:13 +0000 (+0000) Subject: [X86] Add costs for non-AVX512 single-source permutation integer shuffles X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a7092d68da1fa3a3d170ff36329ecf6df0fd419b;p=llvm [X86] Add costs for non-AVX512 single-source permutation integer shuffles Differential Revision: https://reviews.llvm.org/D29416 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@293932 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 3ebfdd0ede7..7629e0c95c6 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -819,7 +819,14 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, { TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb { TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw - { TTI::SK_Alternate, MVT::v32i8, 1 } // vpblendvb + { TTI::SK_Alternate, MVT::v32i8, 1 }, // vpblendvb + + { TTI::SK_PermuteSingleSrc, MVT::v4i64, 1 }, // vpermq + { TTI::SK_PermuteSingleSrc, MVT::v8i32, 1 }, // vpermd + { TTI::SK_PermuteSingleSrc, MVT::v16i16, 4 }, // vperm2i128 + 2 * vpshufb + // + vpblendvb + { TTI::SK_PermuteSingleSrc, MVT::v32i8, 4 } // vperm2i128 + 2 * vpshufb + // + vpblendvb }; if (ST->hasAVX2()) @@ -876,7 +883,10 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, { TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb { TTI::SK_Alternate, MVT::v8i16, 3 }, // pshufb + pshufb + por - { TTI::SK_Alternate, MVT::v16i8, 3 } // pshufb + pshufb + por + { TTI::SK_Alternate, MVT::v16i8, 3 }, // pshufb + pshufb + por + + { TTI::SK_PermuteSingleSrc, MVT::v8i16, 1 }, // pshufb + { TTI::SK_PermuteSingleSrc, MVT::v16i8, 1 } // pshufb }; if (ST->hasSSSE3()) @@ -901,7 +911,10 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd { TTI::SK_Alternate, MVT::v4i32, 2 }, // 2*shufps { TTI::SK_Alternate, MVT::v8i16, 3 }, // pand + pandn + por - { TTI::SK_Alternate, MVT::v16i8, 3 } // pand + pandn + por + { TTI::SK_Alternate, MVT::v16i8, 3 }, // pand + pandn + por + + { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // pshufd + { TTI::SK_PermuteSingleSrc, MVT::v4i32, 1 } // pshufd }; if (ST->hasSSE2()) diff --git a/test/Analysis/CostModel/X86/shuffle-single-src.ll b/test/Analysis/CostModel/X86/shuffle-single-src.ll index ba1a59da2c2..e43e1afcdf5 100644 --- a/test/Analysis/CostModel/X86/shuffle-single-src.ll +++ b/test/Analysis/CostModel/X86/shuffle-single-src.ll @@ -46,7 +46,7 @@ define void @test_vXi64(<4 x i64> %src256, <8 x i64> %src512) { ; SSSE3: cost of 8 {{.*}} %V256 = shufflevector ; SSE42: cost of 8 {{.*}} %V256 = shufflevector ; AVX1: cost of 8 {{.*}} %V256 = shufflevector - ; AVX2: cost of 8 {{.*}} %V256 = shufflevector + ; AVX2: cost of 1 {{.*}} %V256 = shufflevector ; AVX512: cost of 1 {{.*}} %V256 = shufflevector %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> @@ -94,11 +94,11 @@ define void @test_vXf32(<4 x float> %src128, <8 x float> %src256, <16 x float> % ; CHECK-LABEL: 'test_vXi32' define void @test_vXi32(<4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512, <32 x i32> %src1024) { - ; SSE2: cost of 8 {{.*}} %V128 = shufflevector - ; SSSE3: cost of 8 {{.*}} %V128 = shufflevector - ; SSE42: cost of 8 {{.*}} %V128 = shufflevector - ; AVX1: cost of 8 {{.*}} %V128 = shufflevector - ; AVX2: cost of 8 {{.*}} %V128 = shufflevector + ; SSE2: cost of 1 {{.*}} %V128 = shufflevector + ; SSSE3: cost of 1 {{.*}} %V128 = shufflevector + ; SSE42: cost of 1 {{.*}} %V128 = shufflevector + ; AVX1: cost of 1 {{.*}} %V128 = shufflevector + ; AVX2: cost of 1 {{.*}} %V128 = shufflevector ; AVX512: cost of 1 {{.*}} %V128 = shufflevector %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> @@ -106,7 +106,7 @@ define void @test_vXi32(<4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512 ; SSSE3: cost of 16 {{.*}} %V256 = shufflevector ; SSE42: cost of 16 {{.*}} %V256 = shufflevector ; AVX1: cost of 16 {{.*}} %V256 = shufflevector - ; AVX2: cost of 16 {{.*}} %V256 = shufflevector + ; AVX2: cost of 1 {{.*}} %V256 = shufflevector ; AVX512: cost of 1 {{.*}} %V256 = shufflevector %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> @@ -132,11 +132,11 @@ define void @test_vXi32(<4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512 define void @test_vXi16(<8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512, <64 x i16> %src1024) { ; SSE2: cost of 16 {{.*}} %V128 = shufflevector - ; SSSE3: cost of 16 {{.*}} %V128 = shufflevector - ; SSE42: cost of 16 {{.*}} %V128 = shufflevector - ; AVX1: cost of 16 {{.*}} %V128 = shufflevector - ; AVX2: cost of 16 {{.*}} %V128 = shufflevector - ; AVX512F: cost of 16 {{.*}} %V128 = shufflevector + ; SSSE3: cost of 1 {{.*}} %V128 = shufflevector + ; SSE42: cost of 1 {{.*}} %V128 = shufflevector + ; AVX1: cost of 1 {{.*}} %V128 = shufflevector + ; AVX2: cost of 1 {{.*}} %V128 = shufflevector + ; AVX512F: cost of 1 {{.*}} %V128 = shufflevector ; AVX512BW: cost of 1 {{.*}} %V128 = shufflevector %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> @@ -144,8 +144,8 @@ define void @test_vXi16(<8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src51 ; SSSE3: cost of 32 {{.*}} %V256 = shufflevector ; SSE42: cost of 32 {{.*}} %V256 = shufflevector ; AVX1: cost of 32 {{.*}} %V256 = shufflevector - ; AVX2: cost of 32 {{.*}} %V256 = shufflevector - ; AVX512F: cost of 32 {{.*}} %V256 = shufflevector + ; AVX2: cost of 4 {{.*}} %V256 = shufflevector + ; AVX512F: cost of 4 {{.*}} %V256 = shufflevector ; AVX512BW cost of 1 {{.*}} %V256 = shufflevector %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> @@ -172,10 +172,10 @@ define void @test_vXi16(<8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src51 ; CHECK-LABEL: 'test_vXi8' define void @test_vXi8(<16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { ; SSE2: cost of 32 {{.*}} %V128 = shufflevector - ; SSSE3: cost of 32 {{.*}} %V128 = shufflevector - ; SSE42: cost of 32 {{.*}} %V128 = shufflevector - ; AVX1: cost of 32 {{.*}} %V128 = shufflevector - ; AVX2: cost of 32 {{.*}} %V128 = shufflevector + ; SSSE3: cost of 1 {{.*}} %V128 = shufflevector + ; SSE42: cost of 1 {{.*}} %V128 = shufflevector + ; AVX1: cost of 1 {{.*}} %V128 = shufflevector + ; AVX2: cost of 1 {{.*}} %V128 = shufflevector ; AVX512: cost of 1 {{.*}} %V128 = shufflevector %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> @@ -183,8 +183,8 @@ define void @test_vXi8(<16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) ; SSSE3: cost of 64 {{.*}} %V256 = shufflevector ; SSE42: cost of 64 {{.*}} %V256 = shufflevector ; AVX1: cost of 64 {{.*}} %V256 = shufflevector - ; AVX2: cost of 64 {{.*}} %V256 = shufflevector - ; AVX512F: cost of 64 {{.*}} %V256 = shufflevector + ; AVX2: cost of 4 {{.*}} %V256 = shufflevector + ; AVX512F: cost of 4 {{.*}} %V256 = shufflevector ; AVX512BW: cost of 3 {{.*}} %V256 = shufflevector %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32>