From: Nikita Popov Date: Sat, 7 Sep 2019 12:13:44 +0000 (+0000) Subject: [X86] Fix pshuflw formation from repeated shuffle mask (PR43230) X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=df591a43ba24bb1e93b7480c079f5ea2a58592b6;p=llvm [X86] Fix pshuflw formation from repeated shuffle mask (PR43230) Fix for https://bugs.llvm.org/show_bug.cgi?id=43230. When creating PSHUFLW from a repeated shuffle mask, we have to apply the checks to the repeated mask, not the original one. For the test case from PR43230 the inspected part of the original mask is all undef. Differential Revision: https://reviews.llvm.org/D67314 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@371307 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 235d31bef8c..d3229296ba7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -32041,8 +32041,8 @@ static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef Mask, if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16) { SmallVector RepeatedMask; if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) { - ArrayRef LoMask(Mask.data() + 0, 4); - ArrayRef HiMask(Mask.data() + 4, 4); + ArrayRef LoMask(RepeatedMask.data() + 0, 4); + ArrayRef HiMask(RepeatedMask.data() + 4, 4); // PSHUFLW: permute lower 4 elements only. if (isUndefOrInRange(LoMask, 0, 4) && diff --git a/test/CodeGen/X86/vector-shuffle-256-v16.ll b/test/CodeGen/X86/vector-shuffle-256-v16.ll index 89e51ab4be0..001288f87d7 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v16.ll @@ -4777,23 +4777,14 @@ define <16 x i16> @pr43230(<16 x i16> %a, <16 x i16> %b) { ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-SLOW-LABEL: pr43230: -; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-SLOW-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] -; AVX2-SLOW-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] -; AVX2-SLOW-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 -; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[26,27],zero,zero -; AVX2-SLOW-NEXT: retq -; -; AVX2-FAST-LABEL: pr43230: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-FAST-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] -; AVX2-FAST-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] -; AVX2-FAST-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 -; AVX2-FAST-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX2-FAST-NEXT: retq +; AVX2-LABEL: pr43230: +; AVX2: # %bb.0: +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] +; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] +; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[26,27],zero,zero +; AVX2-NEXT: retq ; ; AVX512VL-LABEL: pr43230: ; AVX512VL: # %bb.0: