[&](SDValue &NewRoot, SmallVectorImpl<int> &NewMask,
SmallVectorImpl<SDValue> &NewInputs) -> bool {
assert(NewMask.empty() && NewInputs.empty() && "Non-empty shuffle mask");
- if (UnaryShuffle || V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
- V2.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
- !isa<ConstantSDNode>(V1.getOperand(1)) ||
- !isa<ConstantSDNode>(V2.getOperand(1)))
+ if (UnaryShuffle)
+ return false;
+
+ SDValue Src1 = V1, Src2 = V2;
+ unsigned Offset1 = 0, Offset2 = 0;
+ if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ isa<ConstantSDNode>(V1.getOperand(1))) {
+ Src1 = V1.getOperand(0);
+ Offset1 = V1.getConstantOperandVal(1);
+ }
+ if (V2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ isa<ConstantSDNode>(V2.getOperand(1))) {
+ Src2 = V2.getOperand(0);
+ Offset2 = V2.getConstantOperandVal(1);
+ }
+ if (Offset1 == 0 && Offset2 == 0)
return false;
// If the src vector types aren't the same, see if we can extend
// one to match the other.
- SDValue Src1 = V1.getOperand(0);
- SDValue Src2 = V2.getOperand(0);
if ((Src1.getValueType().getScalarType() !=
Src2.getValueType().getScalarType()) ||
!DAG.getTargetLoweringInfo().isTypeLegal(Src1.getValueType()) ||
}
}
- unsigned Offset1 = V1.getConstantOperandVal(1);
- unsigned Offset2 = V2.getConstantOperandVal(1);
assert(((Offset1 % VT1.getVectorNumElements()) == 0 &&
(Offset2 % VT2.getVectorNumElements()) == 0 &&
(Src1SizeInBits % RootSizeInBits) == 0 &&
;
; AVX512VBMI-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_61:
; AVX512VBMI: # %bb.0:
-; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512VBMI-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm3 = <u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u>
-; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm2, %xmm2
-; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm1, %xmm1
-; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm3 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u>
-; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm2, %xmm2
-; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm0, %xmm0
-; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm2 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u>
+; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm0, %xmm2
+; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; AVX512VBMI-NEXT: vpbroadcastq {{.*#+}} zmm2 = [4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321]
+; AVX512VBMI-NEXT: vpermb %zmm0, %zmm2, %zmm0
+; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX512VBMI-NEXT: vzeroupper
; AVX512VBMI-NEXT: retq
;
; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm0, %xmm2
; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX512VBMI-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,1,5,9,14,u,u,u,u,u,u,u,u]
-; AVX512VBMI-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u]
-; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; AVX512VBMI-NEXT: vpbroadcastq {{.*#+}} zmm2 = [4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257]
+; AVX512VBMI-NEXT: vpermb %zmm0, %zmm2, %zmm0
; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX512VBMI-NEXT: vzeroupper
; AVX512VBMI-NEXT: retq
;
; AVX512VBMI-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_61:
; AVX512VBMI: # %bb.0:
-; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512VBMI-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm3 = <u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u>
-; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm2, %xmm2
-; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm1, %xmm1
-; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm3 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u>
-; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm2, %xmm2
-; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm0, %xmm0
-; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm2 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u>
+; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm0, %xmm2
+; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; AVX512VBMI-NEXT: vpbroadcastq {{.*#+}} zmm2 = [4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321]
+; AVX512VBMI-NEXT: vpermb %zmm0, %zmm2, %zmm0
+; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX512VBMI-NEXT: vzeroupper
; AVX512VBMI-NEXT: retq
;
; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm0, %xmm2
; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX512VBMI-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,1,5,9,14,u,u,u,u,u,u,u,u]
-; AVX512VBMI-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u]
-; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; AVX512VBMI-NEXT: vpbroadcastq {{.*#+}} zmm2 = [4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257]
+; AVX512VBMI-NEXT: vpermb %zmm0, %zmm2, %zmm0
; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX512VBMI-NEXT: vzeroupper
; AVX512VBMI-NEXT: retq
;
; AVX512VLVBMI-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
; AVX512VLVBMI: # %bb.0:
-; AVX512VLVBMI-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,32,32,32,32,32,32,32,32,15,15,15,15,15,15,15,15,32,32,32,32,32,32,32,32]
-; AVX512VLVBMI-NEXT: # ymm2 = mem[0,1,0,1]
-; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0
+; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,16,16,16,16,16,16,16,16]
+; AVX512VLVBMI-NEXT: vpermt2b %xmm1, %xmm2, %xmm0
; AVX512VLVBMI-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <32 x i8> %shuffle