From: Simon Pilgrim Date: Sun, 18 Aug 2019 17:15:26 +0000 (+0000) Subject: [X86] isTargetShuffleEquivalent - add BUILD_VECTOR matching X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b79d9984556288d385e73d5bd782c9998b61dfcc;p=llvm [X86] isTargetShuffleEquivalent - add BUILD_VECTOR matching Add similar functionality to isShuffleEquivalent - if the mask elements don't match, try matching the BUILD_VECTOR scalars instead. As target shuffles need to handle SM_Sentinel values, this can get a bit tricky, so commit just adds actual mask element index handling - full SM_SentinelZero support will be added when the need arises. Also, enables support in matchVectorShuffleWithPACK git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@369212 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6c739d8aad6..ac616ffa8ba 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -10078,7 +10078,9 @@ static bool isShuffleEquivalent(SDValue V1, SDValue V2, ArrayRef Mask, /// SM_SentinelZero is accepted as a valid negative index but must match in /// both. static bool isTargetShuffleEquivalent(ArrayRef Mask, - ArrayRef ExpectedMask) { + ArrayRef ExpectedMask, + SDValue V1 = SDValue(), + SDValue V2 = SDValue()) { int Size = Mask.size(); if (Size != (int)ExpectedMask.size()) return false; @@ -10089,9 +10091,25 @@ static bool isTargetShuffleEquivalent(ArrayRef Mask, if (!isUndefOrZeroOrInRange(Mask, 0, 2 * Size)) return false; + // If the values are build vectors, we can look through them to find + // equivalent inputs that make the shuffles equivalent. + auto *BV1 = dyn_cast_or_null(V1); + auto *BV2 = dyn_cast_or_null(V2); + BV1 = ((BV1 && BV1->getNumOperands() != Size) ? nullptr : BV1); + BV2 = ((BV2 && BV2->getNumOperands() != Size) ? nullptr : BV2); + for (int i = 0; i < Size; ++i) { if (Mask[i] == SM_SentinelUndef || Mask[i] == ExpectedMask[i]) continue; + if (0 <= Mask[i] && 0 <= ExpectedMask[i]) { + auto *MaskBV = Mask[i] < Size ? BV1 : BV2; + auto *ExpectedBV = ExpectedMask[i] < Size ? BV1 : BV2; + if (MaskBV && ExpectedBV && + MaskBV->getOperand(Mask[i] % Size) == + ExpectedBV->getOperand(ExpectedMask[i] % Size)) + continue; + } + // TODO - handle SM_Sentinel equivalences. return false; } return true; @@ -10653,14 +10671,14 @@ static bool matchVectorShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, // Try binary shuffle. SmallVector BinaryMask; createPackShuffleMask(VT, BinaryMask, false); - if (isTargetShuffleEquivalent(TargetMask, BinaryMask)) + if (isTargetShuffleEquivalent(TargetMask, BinaryMask, V1, V2)) if (MatchPACK(V1, V2)) return true; // Try unary shuffle. SmallVector UnaryMask; createPackShuffleMask(VT, UnaryMask, true); - if (isTargetShuffleEquivalent(TargetMask, UnaryMask)) + if (isTargetShuffleEquivalent(TargetMask, UnaryMask, V1)) if (MatchPACK(V1, V1)) return true; diff --git a/test/CodeGen/X86/packss.ll b/test/CodeGen/X86/packss.ll index 3b7dd912efe..e3bd9d9e6ed 100644 --- a/test/CodeGen/X86/packss.ll +++ b/test/CodeGen/X86/packss.ll @@ -267,16 +267,15 @@ define <16 x i8> @packsswb_icmp_zero_128(<8 x i16> %a0) { ; SSE-LABEL: packsswb_icmp_zero_128: ; SSE: # %bb.0: ; SSE-NEXT: pxor %xmm1, %xmm1 -; SSE-NEXT: pcmpeqw %xmm0, %xmm1 -; SSE-NEXT: packsswb %xmm0, %xmm1 -; SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero +; SSE-NEXT: pcmpeqw %xmm1, %xmm0 +; SSE-NEXT: packsswb %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: packsswb_icmp_zero_128: ; AVX: # %bb.0: ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero +; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %1 = icmp eq <8 x i16> %a0, zeroinitializer %2 = sext <8 x i1> %1 to <8 x i8> @@ -311,10 +310,11 @@ define <32 x i8> @packsswb_icmp_zero_256(<16 x i16> %a0) { ; SSE-NEXT: pxor %xmm2, %xmm2 ; SSE-NEXT: pcmpeqw %xmm2, %xmm1 ; SSE-NEXT: pcmpeqw %xmm2, %xmm0 -; SSE-NEXT: packsswb %xmm0, %xmm0 -; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] -; SSE-NEXT: packsswb %xmm1, %xmm1 -; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] +; SSE-NEXT: pxor %xmm3, %xmm3 +; SSE-NEXT: packsswb %xmm0, %xmm3 +; SSE-NEXT: packsswb %xmm1, %xmm2 +; SSE-NEXT: movdqa %xmm3, %xmm0 +; SSE-NEXT: movdqa %xmm2, %xmm1 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX1-LABEL: packsswb_icmp_zero_256: @@ -323,9 +323,8 @@ define <32 x i8> @packsswb_icmp_zero_256(<16 x i16> %a0) { ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,0,2,4,6,8,10,12,14] -; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: ret{{[l|q]}} ; @@ -333,7 +332,7 @@ define <32 x i8> @packsswb_icmp_zero_256(<16 x i16> %a0) { ; AVX2: # %bb.0: ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,18,20,22,24,26,28,30] +; AVX2-NEXT: vpacksswb %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: ret{{[l|q]}} %1 = icmp eq <16 x i16> %a0, zeroinitializer %2 = sext <16 x i1> %1 to <16 x i16>