}
}
+ // Handle PSHUFD/VPERMILPI vXi32/vXf32 repeated patterns.
+ // AVX introduced the VPERMILPD/VPERMILPS float permutes, before then we
+ // had to use 2-input SHUFPD/SHUFPS shuffles (not handled here).
+ if ((MaskScalarSizeInBits == 64 || MaskScalarSizeInBits == 32) &&
+ !ContainsZeros && (AllowIntDomain || Subtarget.hasAVX())) {
+ SmallVector<int, 4> RepeatedMask;
+ if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) {
+ // Narrow the repeated mask to create 32-bit element permutes.
+ SmallVector<int, 4> WordMask = RepeatedMask;
+ if (MaskScalarSizeInBits == 64)
+ scaleShuffleMask(2, RepeatedMask, WordMask);
+
+ Shuffle = (AllowIntDomain ? X86ISD::PSHUFD : X86ISD::VPERMILPI);
+ ShuffleVT = (AllowIntDomain ? MVT::i32 : MVT::f32);
+ ShuffleVT = MVT::getVectorVT(ShuffleVT, InputSizeInBits / 32);
+ PermuteImm = getV4X86ShuffleImm(WordMask);
+ return true;
+ }
+ }
+
// Handle PSHUFLW/PSHUFHW vXi16 repeated patterns.
if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16) {
SmallVector<int, 4> RepeatedMask;
}
}
- // Ensure we don't contain any zero elements.
- if (ContainsZeros)
- return false;
-
- assert(llvm::all_of(Mask, [&](int M) {
- return SM_SentinelUndef <= M && M < (int)NumMaskElts;
- }) && "Expected unary shuffle");
-
- // We only support permutation of 32/64 bit elements after this.
- if (MaskScalarSizeInBits != 32 && MaskScalarSizeInBits != 64)
- return false;
-
- // AVX introduced the VPERMILPD/VPERMILPS float permutes, before then we
- // had to use 2-input SHUFPD/SHUFPS shuffles (not handled here).
- if ((AllowFloatDomain && !AllowIntDomain) && !Subtarget.hasAVX())
- return false;
-
- // We need a repeating shuffle mask for VPERMILPS/PSHUFD.
- SmallVector<int, 4> RepeatedMask;
- if (!is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask))
- return false;
-
- // Narrow the repeated mask for 32-bit element permutes.
- SmallVector<int, 4> WordMask = RepeatedMask;
- if (MaskScalarSizeInBits == 64)
- scaleShuffleMask(2, RepeatedMask, WordMask);
-
- Shuffle = (AllowFloatDomain ? X86ISD::VPERMILPI : X86ISD::PSHUFD);
- ShuffleVT = (AllowFloatDomain ? MVT::f32 : MVT::i32);
- ShuffleVT = MVT::getVectorVT(ShuffleVT, InputSizeInBits / 32);
- PermuteImm = getV4X86ShuffleImm(WordMask);
- return true;
+ return false;
}
// Attempt to match a combined unary shuffle mask against supported binary
; SSE2-NEXT: andnps %xmm5, %xmm0
; SSE2-NEXT: orps %xmm4, %xmm0
; SSE2-NEXT: cvtps2pd %xmm0, %xmm2
-; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; SSE2-NEXT: cvtps2pd %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: retq
; SSE-X32-LABEL: extract_i64_1:
; SSE-X32: # BB#0:
; SSE-X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SSE-X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
+; SSE-X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; SSE-X32-NEXT: movq %xmm0, (%eax)
; SSE-X32-NEXT: retl
;