/// SM_SentinelZero is accepted as a valid negative index but must match in
/// both.
static bool isTargetShuffleEquivalent(ArrayRef<int> Mask,
- ArrayRef<int> ExpectedMask) {
+ ArrayRef<int> ExpectedMask,
+ SDValue V1 = SDValue(),
+ SDValue V2 = SDValue()) {
int Size = Mask.size();
if (Size != (int)ExpectedMask.size())
return false;
if (!isUndefOrZeroOrInRange(Mask, 0, 2 * Size))
return false;
+ // If the values are build vectors, we can look through them to find
+ // equivalent inputs that make the shuffles equivalent.
+ auto *BV1 = dyn_cast_or_null<BuildVectorSDNode>(V1);
+ auto *BV2 = dyn_cast_or_null<BuildVectorSDNode>(V2);
+ BV1 = ((BV1 && BV1->getNumOperands() != Size) ? nullptr : BV1);
+ BV2 = ((BV2 && BV2->getNumOperands() != Size) ? nullptr : BV2);
+
for (int i = 0; i < Size; ++i) {
if (Mask[i] == SM_SentinelUndef || Mask[i] == ExpectedMask[i])
continue;
+ if (0 <= Mask[i] && 0 <= ExpectedMask[i]) {
+ auto *MaskBV = Mask[i] < Size ? BV1 : BV2;
+ auto *ExpectedBV = ExpectedMask[i] < Size ? BV1 : BV2;
+ if (MaskBV && ExpectedBV &&
+ MaskBV->getOperand(Mask[i] % Size) ==
+ ExpectedBV->getOperand(ExpectedMask[i] % Size))
+ continue;
+ }
+ // TODO - handle SM_Sentinel equivalences.
return false;
}
return true;
// Try binary shuffle.
SmallVector<int, 32> BinaryMask;
createPackShuffleMask(VT, BinaryMask, false);
- if (isTargetShuffleEquivalent(TargetMask, BinaryMask))
+ if (isTargetShuffleEquivalent(TargetMask, BinaryMask, V1, V2))
if (MatchPACK(V1, V2))
return true;
// Try unary shuffle.
SmallVector<int, 32> UnaryMask;
createPackShuffleMask(VT, UnaryMask, true);
- if (isTargetShuffleEquivalent(TargetMask, UnaryMask))
+ if (isTargetShuffleEquivalent(TargetMask, UnaryMask, V1))
if (MatchPACK(V1, V1))
return true;
; SSE-LABEL: packsswb_icmp_zero_128:
; SSE: # %bb.0:
; SSE-NEXT: pxor %xmm1, %xmm1
-; SSE-NEXT: pcmpeqw %xmm0, %xmm1
-; SSE-NEXT: packsswb %xmm0, %xmm1
-; SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
+; SSE-NEXT: pcmpeqw %xmm1, %xmm0
+; SSE-NEXT: packsswb %xmm1, %xmm0
; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: packsswb_icmp_zero_128:
; AVX: # %bb.0:
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = icmp eq <8 x i16> %a0, zeroinitializer
%2 = sext <8 x i1> %1 to <8 x i8>
; SSE-NEXT: pxor %xmm2, %xmm2
; SSE-NEXT: pcmpeqw %xmm2, %xmm1
; SSE-NEXT: pcmpeqw %xmm2, %xmm0
-; SSE-NEXT: packsswb %xmm0, %xmm0
-; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
-; SSE-NEXT: packsswb %xmm1, %xmm1
-; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
+; SSE-NEXT: pxor %xmm3, %xmm3
+; SSE-NEXT: packsswb %xmm0, %xmm3
+; SSE-NEXT: packsswb %xmm1, %xmm2
+; SSE-NEXT: movdqa %xmm3, %xmm0
+; SSE-NEXT: movdqa %xmm2, %xmm1
; SSE-NEXT: ret{{[l|q]}}
;
; AVX1-LABEL: packsswb_icmp_zero_256:
; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,0,2,4,6,8,10,12,14]
-; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: ret{{[l|q]}}
;
; AVX2: # %bb.0:
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,18,20,22,24,26,28,30]
+; AVX2-NEXT: vpacksswb %ymm0, %ymm1, %ymm0
; AVX2-NEXT: ret{{[l|q]}}
%1 = icmp eq <16 x i16> %a0, zeroinitializer
%2 = sext <16 x i1> %1 to <16 x i16>