From: Simon Pilgrim Date: Sat, 7 Jan 2017 15:37:50 +0000 (+0000) Subject: [X86][AVX512] Use lowerShuffleAsRepeatedMaskAndLanePermute for non-VBMI v64i8 shuffle... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f9fdf76b96fbd6567ac4caa2c02b0052cc6357d1;p=llvm [X86][AVX512] Use lowerShuffleAsRepeatedMaskAndLanePermute for non-VBMI v64i8 shuffles (PR31470) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291347 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 7f72ab17f61..ad58f449478 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -12965,6 +12965,12 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, if (Subtarget.hasVBMI()) return lowerVectorShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, DAG); + // Try to create an in-lane repeating shuffle mask and then shuffle the + // the results into the target lanes. + if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( + DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG)) + return V; + // FIXME: Implement direct support for this type! return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG); } diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index de4839432b9..9167c09b43f 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -639,8 +639,7 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, { TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw { TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw - { TTI::SK_Reverse, MVT::v64i8, 6 }, // vextracti64x4 + 2*vperm2i128 - // + 2*pshufb + vinserti64x4 + { TTI::SK_Reverse, MVT::v64i8, 2 }, // pshufb + vshufi64x2 { TTI::SK_PermuteSingleSrc, MVT::v32i16, 1 }, // vpermw { TTI::SK_PermuteSingleSrc, MVT::v16i16, 1 }, // vpermw diff --git a/test/Analysis/CostModel/X86/shuffle-reverse.ll b/test/Analysis/CostModel/X86/shuffle-reverse.ll index a1bdda0690a..627d7985743 100644 --- a/test/Analysis/CostModel/X86/shuffle-reverse.ll +++ b/test/Analysis/CostModel/X86/shuffle-reverse.ll @@ -161,7 +161,7 @@ define void @test_vXi8(<16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) ; AVX1: cost of 8 {{.*}} %V512 = shufflevector ; AVX2: cost of 4 {{.*}} %V512 = shufflevector ; AVX512F: cost of 4 {{.*}} %V512 = shufflevector - ; AVX512BW: cost of 6 {{.*}} %V512 = shufflevector + ; AVX512BW: cost of 2 {{.*}} %V512 = shufflevector %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ret void diff --git a/test/CodeGen/X86/vector-shuffle-512-v64.ll b/test/CodeGen/X86/vector-shuffle-512-v64.ll index 2836d69a0fe..f4650ec741a 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v64.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v64.ll @@ -178,13 +178,8 @@ define <64 x i8> @shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_ ; ; AVX512BW-LABEL: shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_47_46_45_44_43_42_41_40_39_38_37_36_35_34_33_32_31_30_29_28_27_26_25_24_23_22_21_20_19_18_17_16_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00: ; AVX512BW: # BB#0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] -; AVX512BW-NEXT: vpshufb %ymm1, %ymm0, %ymm2 -; AVX512BW-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm2[2,3,0,1] -; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512BW-NEXT: vpshufb %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,47,46,45,44,43,42,41,40,39,38,37,36,35,34,33,32,63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48] +; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5,2,3,0,1] ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_47_46_45_44_43_42_41_40_39_38_37_36_35_34_33_32_31_30_29_28_27_26_25_24_23_22_21_20_19_18_17_16_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00: