From: Simon Pilgrim Date: Fri, 28 Jun 2019 12:24:49 +0000 (+0000) Subject: [X86] CombineShuffleWithExtract - only require 1 source to be EXTRACT_SUBVECTOR X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ef0e051c6d035ff067d4f73fa2ed90bacfaafbae;p=llvm [X86] CombineShuffleWithExtract - only require 1 source to be EXTRACT_SUBVECTOR We were requiring that both shuffle operands were EXTRACT_SUBVECTORs, but we can relax this to only require one of them to be. Also, we shouldn't bother attempting this if both operands are from the lowest subvector (or not EXTRACT_SUBVECTOR at all). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364644 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2ad8ade1a98..c31e452ed7f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -32042,16 +32042,26 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, [&](SDValue &NewRoot, SmallVectorImpl &NewMask, SmallVectorImpl &NewInputs) -> bool { assert(NewMask.empty() && NewInputs.empty() && "Non-empty shuffle mask"); - if (UnaryShuffle || V1.getOpcode() != ISD::EXTRACT_SUBVECTOR || - V2.getOpcode() != ISD::EXTRACT_SUBVECTOR || - !isa(V1.getOperand(1)) || - !isa(V2.getOperand(1))) + if (UnaryShuffle) + return false; + + SDValue Src1 = V1, Src2 = V2; + unsigned Offset1 = 0, Offset2 = 0; + if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR && + isa(V1.getOperand(1))) { + Src1 = V1.getOperand(0); + Offset1 = V1.getConstantOperandVal(1); + } + if (V2.getOpcode() == ISD::EXTRACT_SUBVECTOR && + isa(V2.getOperand(1))) { + Src2 = V2.getOperand(0); + Offset2 = V2.getConstantOperandVal(1); + } + if (Offset1 == 0 && Offset2 == 0) return false; // If the src vector types aren't the same, see if we can extend // one to match the other. - SDValue Src1 = V1.getOperand(0); - SDValue Src2 = V2.getOperand(0); if ((Src1.getValueType().getScalarType() != Src2.getValueType().getScalarType()) || !DAG.getTargetLoweringInfo().isTypeLegal(Src1.getValueType()) || @@ -32075,8 +32085,6 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, } } - unsigned Offset1 = V1.getConstantOperandVal(1); - unsigned Offset2 = V2.getConstantOperandVal(1); assert(((Offset1 % VT1.getVectorNumElements()) == 0 && (Offset2 % VT2.getVectorNumElements()) == 0 && (Src1SizeInBits % RootSizeInBits) == 0 && diff --git a/test/CodeGen/X86/shuffle-vs-trunc-512-widen.ll b/test/CodeGen/X86/shuffle-vs-trunc-512-widen.ll index 656dcf9b64f..246ed3e6f54 100644 --- a/test/CodeGen/X86/shuffle-vs-trunc-512-widen.ll +++ b/test/CodeGen/X86/shuffle-vs-trunc-512-widen.ll @@ -716,18 +716,14 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_ ; ; AVX512VBMI-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_61: ; AVX512VBMI: # %bb.0: -; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512VBMI-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm3 = -; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm2, %xmm2 -; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm1, %xmm1 -; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm2 -; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm3 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm2, %xmm2 -; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm0, %xmm0 -; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm2 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm0, %xmm2 +; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; AVX512VBMI-NEXT: vpbroadcastq {{.*#+}} zmm2 = [4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321] +; AVX512VBMI-NEXT: vpermb %zmm0, %zmm2, %zmm0 +; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; AVX512VBMI-NEXT: vzeroupper ; AVX512VBMI-NEXT: retq ; @@ -813,11 +809,8 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_ ; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm1, %xmm1 ; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm0, %xmm2 ; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm2 -; AVX512VBMI-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,1,5,9,14,u,u,u,u,u,u,u,u] -; AVX512VBMI-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u] -; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; AVX512VBMI-NEXT: vpbroadcastq {{.*#+}} zmm2 = [4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257] +; AVX512VBMI-NEXT: vpermb %zmm0, %zmm2, %zmm0 ; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; AVX512VBMI-NEXT: vzeroupper ; AVX512VBMI-NEXT: retq diff --git a/test/CodeGen/X86/shuffle-vs-trunc-512.ll b/test/CodeGen/X86/shuffle-vs-trunc-512.ll index c42f91f50c0..bf704873d6f 100644 --- a/test/CodeGen/X86/shuffle-vs-trunc-512.ll +++ b/test/CodeGen/X86/shuffle-vs-trunc-512.ll @@ -707,18 +707,14 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_ ; ; AVX512VBMI-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_61: ; AVX512VBMI: # %bb.0: -; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512VBMI-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm3 = -; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm2, %xmm2 -; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm1, %xmm1 -; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm2 -; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm3 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm2, %xmm2 -; AVX512VBMI-NEXT: vpshufb %xmm3, %xmm0, %xmm0 -; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm2 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm0, %xmm2 +; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; AVX512VBMI-NEXT: vpbroadcastq {{.*#+}} zmm2 = [4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321,4411615795313452321] +; AVX512VBMI-NEXT: vpermb %zmm0, %zmm2, %zmm0 +; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; AVX512VBMI-NEXT: vzeroupper ; AVX512VBMI-NEXT: retq ; @@ -804,11 +800,8 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_ ; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm1, %xmm1 ; AVX512VBMI-NEXT: vpshufb %xmm2, %xmm0, %xmm2 ; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm2 -; AVX512VBMI-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,1,5,9,14,u,u,u,u,u,u,u,u] -; AVX512VBMI-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u] -; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; AVX512VBMI-NEXT: vpbroadcastq {{.*#+}} zmm2 = [4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257,4483673389351380257] +; AVX512VBMI-NEXT: vpermb %zmm0, %zmm2, %zmm0 ; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; AVX512VBMI-NEXT: vzeroupper ; AVX512VBMI-NEXT: retq diff --git a/test/CodeGen/X86/vector-shuffle-256-v32.ll b/test/CodeGen/X86/vector-shuffle-256-v32.ll index a58b6bd5471..8dfcffbe0a4 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -2994,9 +2994,8 @@ define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_ ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: ; AVX512VLVBMI: # %bb.0: -; AVX512VLVBMI-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,32,32,32,32,32,32,32,32,15,15,15,15,15,15,15,15,32,32,32,32,32,32,32,32] -; AVX512VLVBMI-NEXT: # ymm2 = mem[0,1,0,1] -; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0 +; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,16,16,16,16,16,16,16,16] +; AVX512VLVBMI-NEXT: vpermt2b %xmm1, %xmm2, %xmm0 ; AVX512VLVBMI-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle